Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/adapters/ai_analyst.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ async def analyze_person(
clean_person.profiles = [p for p in clean_person.profiles if p.exists]


api_key = (settings.ai_api_key or "").strip()
api_key = (settings.ai_api_key.get_secret_value() if settings.ai_api_key else "").strip()
if not api_key:
# Sin API key: si es un provider local OpenAI-compatible, usamos dummy.
# En providers hosted (DeepSeek/Groq/etc.) caemos a heurístico.
Expand Down
34 changes: 22 additions & 12 deletions src/adapters/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,29 @@
_API_PROXY_ENDPOINT = "proxy.scrapingant.com:8080"


def _build_proxy_url(settings: AppSettings) -> str | None:
def _build_proxy_url(
settings: AppSettings,
) -> tuple[str, tuple[str, str]] | tuple[None, None]:
"""Build a ScrapingAnt proxy URL from settings.

Supports two modes:

1. **Standalone proxies** (when ``proxy_username`` is set):
``http://customer-USER-country-CC:KEY@residential.scrapingant.com:8080``
``http://residential.scrapingant.com:8080`` + auth tuple
2. **API Proxy Mode** (fallback, when only ``proxy_api_key`` is set):
``http://scrapingant&browser=false&proxy_type=MODE:KEY@proxy.scrapingant.com:8080``
``http://proxy.scrapingant.com:8080`` + auth tuple

Returns an ``http://user:pass@host:port`` string suitable for
``httpx.AsyncClient(proxy=...)``, or ``None`` when proxy is disabled.
Returns ``(base_url, (username, password))`` — credentials are **never**
embedded in the URL string — or ``(None, None)`` when proxy is disabled.
"""
mode = settings.effective_proxy_mode
if not mode or not settings.proxy_api_key:
return None
return None, None

if mode not in ("residential", "datacenter"):
return None
return None, None

password = settings.proxy_api_key
password = settings.proxy_api_key.get_secret_value()

if settings.proxy_username:
# ── Standalone residential/datacenter proxy product ──
Expand All @@ -70,7 +72,7 @@ def _build_proxy_url(settings: AppSettings) -> str | None:
username += f"&proxy_country={settings.proxy_country}"
endpoint = _API_PROXY_ENDPOINT

return f"http://{username}:{password}@{endpoint}"
return f"http://{endpoint}", (username, password)


# ---------------------------------------------------------------------------
Expand All @@ -87,6 +89,10 @@ def build_async_client(
Por qué un builder:
- Centraliza timeouts/headers para que todas las fuentes se comporten igual.
- Inyecta proxy ScrapingAnt de forma transparente si está configurado.

Security: proxy credentials are passed via ``httpx.Proxy(auth=...)``
instead of being embedded in the URL, so they never leak into exception
messages, debug logs, or tracebacks.
"""

settings = settings or AppSettings()
Expand All @@ -97,14 +103,18 @@ def build_async_client(
if extra_headers:
headers.update(extra_headers)

proxy_url = _build_proxy_url(settings)
proxy_base, proxy_auth = _build_proxy_url(settings)

proxy = None
if proxy_base and proxy_auth:
proxy = httpx.Proxy(proxy_base, auth=proxy_auth)

return httpx.AsyncClient(
timeout=httpx.Timeout(settings.http_timeout_seconds),
follow_redirects=True,
headers=headers,
proxy=proxy_url,
verify=proxy_url is None, # Proxy handles TLS termination.
proxy=proxy,
verify=proxy is None, # Proxy handles TLS termination.
)


Expand Down
3 changes: 2 additions & 1 deletion src/cli/doctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def run() -> None:
# Proxy (ScrapingAnt)
proxy_mode = settings.effective_proxy_mode
if proxy_mode:
key_masked = "****" + (settings.proxy_api_key or "")[-4:] if settings.proxy_api_key else "(none)"
raw_key = settings.proxy_api_key.get_secret_value() if settings.proxy_api_key else ""
key_masked = "****" + raw_key[-4:] if raw_key else "(none)"
country = settings.proxy_country.upper() if settings.proxy_country else "auto"
table.add_row("Proxy mode", "OK", f"{proxy_mode} (ScrapingAnt)")
table.add_row("Proxy API key", "OK", key_masked)
Expand Down
6 changes: 3 additions & 3 deletions src/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _configure_ai_for_run(
base_url = preset["base_url"]
model = preset["model"]

key = (ai_key or "").strip() or (settings.ai_api_key or "").strip()
key = (ai_key or "").strip() or (settings.ai_api_key.get_secret_value() if settings.ai_api_key else "").strip()

# Para proveedores locales (Ollama), la key es opcional/dummy.
if provider == "ollama" and not key:
Expand Down Expand Up @@ -1526,7 +1526,7 @@ def wizard() -> None:

# Ensure AI is configured.
settings_now = AppSettings()
if not (settings_now.ai_api_key or "").strip():
if not (settings_now.ai_api_key.get_secret_value() if settings_now.ai_api_key else "").strip():
console.print(
"[yellow]Agent mode requires an AI provider.[/yellow]"
)
Expand Down Expand Up @@ -1660,7 +1660,7 @@ def wizard() -> None:
deep_analyze = Confirm.ask("Run AI analysis?", default=True)
if deep_analyze:
settings_now = AppSettings()
if not (settings_now.ai_api_key or "").strip() and settings_now.ai_base_url.startswith("https://api.deepseek"):
if not (settings_now.ai_api_key.get_secret_value() if settings_now.ai_api_key else "").strip() and settings_now.ai_base_url.startswith("https://api.deepseek"):
if Confirm.ask("No AI key configured. Configure a free-tier provider now (recommended)?", default=True):
provider = Prompt.ask(
"Provider",
Expand Down
6 changes: 3 additions & 3 deletions src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import sys
from pathlib import Path

from pydantic import Field
from pydantic import Field, SecretStr
from pydantic_settings import BaseSettings, SettingsConfigDict

from core.domain.language import Language
Expand Down Expand Up @@ -107,7 +107,7 @@ class AppSettings(BaseSettings):
description="User-Agent para peticiones OSINT.",
)

ai_api_key: str | None = Field(
ai_api_key: SecretStr | None = Field(
default=None,
description="API key para el proveedor IA (DeepSeek compatible OpenAI).",
)
Expand Down Expand Up @@ -196,7 +196,7 @@ class AppSettings(BaseSettings):
"Auto-detected from proxy_api_key if not set explicitly."
),
)
proxy_api_key: str | None = Field(
proxy_api_key: SecretStr | None = Field(
default=None,
description="ScrapingAnt API/proxy key (password for proxy auth).",
)
Expand Down
2 changes: 1 addition & 1 deletion src/core/services/agent_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async def run(
)

client = AsyncOpenAI(
api_key=self._settings.ai_api_key,
api_key=self._settings.ai_api_key.get_secret_value(),
base_url=self._settings.ai_base_url,
)

Expand Down
109 changes: 80 additions & 29 deletions tests/test_http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,22 @@ def test_residential_with_username(self):
proxy_username="angeldOzt2u",
proxy_mode="residential",
)
url = _build_proxy_url(settings)
assert url == (
"http://customer-angeldOzt2u:my-password"
"@residential.scrapingant.com:8080"
)
base_url, auth = _build_proxy_url(settings)
assert base_url == "http://residential.scrapingant.com:8080"
assert auth == ("customer-angeldOzt2u", "my-password")
# Key must NOT appear in the URL
assert "my-password" not in base_url

def test_datacenter_with_username(self):
settings = AppSettings(
proxy_api_key="my-password",
proxy_username="angeldOzt2u",
proxy_mode="datacenter",
)
url = _build_proxy_url(settings)
assert url == (
"http://customer-angeldOzt2u:my-password"
"@datacenter.scrapingant.com:8080"
)
base_url, auth = _build_proxy_url(settings)
assert base_url == "http://datacenter.scrapingant.com:8080"
assert auth == ("customer-angeldOzt2u", "my-password")
assert "my-password" not in base_url

def test_residential_with_country(self):
settings = AppSettings(
Expand All @@ -42,11 +41,10 @@ def test_residential_with_country(self):
proxy_mode="residential",
proxy_country="us",
)
url = _build_proxy_url(settings)
assert url == (
"http://customer-myuser-country-us:key123"
"@residential.scrapingant.com:8080"
)
base_url, auth = _build_proxy_url(settings)
assert base_url == "http://residential.scrapingant.com:8080"
assert auth == ("customer-myuser-country-us", "key123")
assert "key123" not in base_url


# ---------------------------------------------------------------------------
Expand All @@ -60,11 +58,15 @@ def test_api_mode_residential(self):
proxy_username=None,
proxy_mode="residential",
)
url = _build_proxy_url(settings)
assert url == (
"http://scrapingant&browser=false&proxy_type=residential"
":my-api-key@proxy.scrapingant.com:8080"
)
base_url, auth = _build_proxy_url(settings)
assert base_url == "http://proxy.scrapingant.com:8080"
assert auth is not None
username, password = auth
assert "scrapingant" in username
assert "proxy_type=residential" in username
assert password == "my-api-key"
# Key must NOT appear in the URL
assert "my-api-key" not in base_url

def test_api_mode_with_country(self):
settings = AppSettings(
Expand All @@ -73,9 +75,11 @@ def test_api_mode_with_country(self):
proxy_mode="residential",
proxy_country="de",
)
url = _build_proxy_url(settings)
assert "proxy_country=de" in url
assert "proxy.scrapingant.com" in url
base_url, auth = _build_proxy_url(settings)
assert "proxy.scrapingant.com" in base_url
username, _ = auth
assert "proxy_country=de" in username
assert "key" not in base_url


# ---------------------------------------------------------------------------
Expand All @@ -85,24 +89,32 @@ def test_api_mode_with_country(self):
class TestBuildProxyUrlEdgeCases:
def test_no_proxy_when_no_key(self):
settings = AppSettings(proxy_api_key=None)
assert _build_proxy_url(settings) is None
base_url, auth = _build_proxy_url(settings)
assert base_url is None
assert auth is None

def test_auto_detect_mode_from_key(self):
settings = AppSettings(proxy_api_key="test-key", proxy_mode=None)
url = _build_proxy_url(settings)
assert url is not None
assert "residential" in url
base_url, auth = _build_proxy_url(settings)
assert base_url is not None
assert auth is not None
# Auto-detected mode is "residential" — visible in the auth username
assert "residential" in auth[0]

def test_unknown_mode_returns_none(self):
settings = AppSettings(
proxy_api_key="key",
proxy_mode="unknown_mode",
)
assert _build_proxy_url(settings) is None
base_url, auth = _build_proxy_url(settings)
assert base_url is None
assert auth is None

def test_empty_key_returns_none(self):
settings = AppSettings(proxy_api_key="", proxy_mode="residential")
assert _build_proxy_url(settings) is None
base_url, auth = _build_proxy_url(settings)
assert base_url is None
assert auth is None


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -142,3 +154,42 @@ def test_explicit_mode_overrides(self):
def test_no_key_no_mode(self):
settings = AppSettings(proxy_api_key=None, proxy_mode=None)
assert settings.effective_proxy_mode is None


# ---------------------------------------------------------------------------
# SecretStr redaction (issue #27)
# ---------------------------------------------------------------------------

class TestSecretStrRedaction:
def test_model_dump_does_not_expose_proxy_key(self):
settings = AppSettings(proxy_api_key="super-secret-key-12345")
dumped = settings.model_dump()
# SecretStr serializes as '**********' in model_dump
assert dumped["proxy_api_key"] != "super-secret-key-12345"
assert "super-secret" not in str(dumped["proxy_api_key"])

def test_model_dump_does_not_expose_ai_key(self):
settings = AppSettings(ai_api_key="sk-my-secret-ai-key")
dumped = settings.model_dump()
assert dumped["ai_api_key"] != "sk-my-secret-ai-key"
assert "sk-my-secret" not in str(dumped["ai_api_key"])

def test_proxy_url_does_not_contain_key(self):
settings = AppSettings(
proxy_api_key="LIVE_SECRET_KEY",
proxy_username="user1",
proxy_mode="residential",
)
base_url, auth = _build_proxy_url(settings)
assert "LIVE_SECRET_KEY" not in base_url
assert auth[1] == "LIVE_SECRET_KEY"

def test_secret_value_accessible_via_getter(self):
settings = AppSettings(proxy_api_key="my-key-value")
assert settings.proxy_api_key.get_secret_value() == "my-key-value"

def test_repr_does_not_expose_key(self):
settings = AppSettings(proxy_api_key="secret123", ai_api_key="sk-secret")
repr_str = repr(settings)
assert "secret123" not in repr_str
assert "sk-secret" not in repr_str
Loading