Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ PORTFOLIO_DEMO_MODE=
# RATE_LIMIT_STRIPE_UNAUTH_MAX=40
# RATE_LIMIT_PROTECTED_API_MAX=80

# Optional observability + alerting
# ALERT_WEBHOOK_URL=
# ALERT_WEBHOOK_TOKEN=
# ALERT_WEBHOOK_TIMEOUT_MS=4000

# Vercel system envs (auto-set on Vercel; optional locally for fallback testing only)
# VERCEL_ENV=
# VERCEL_URL=
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
NEXT_PUBLIC_APP_URL: https://example.com
DATABASE_URL: postgresql://postgres:postgres@localhost:5432/callbackcloser?sslmode=require
DIRECT_DATABASE_URL: postgresql://postgres:postgres@localhost:5432/callbackcloser?sslmode=require
NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: pk_test_placeholder
NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY: pk_test_Y2xlcmsuZXhhbXBsZS5jb20k
CLERK_SECRET_KEY: sk_test_placeholder
STRIPE_SECRET_KEY: sk_test_placeholder
STRIPE_WEBHOOK_SECRET: whsec_placeholder
Expand All @@ -28,7 +28,7 @@ jobs:

- uses: actions/setup-node@v4
with:
node-version: 20
node-version: 22
cache: npm

- run: npm ci
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ When a customer calls a business's Twilio number and the forwarded call is misse
- SMS compliance commands (`STOP` / `START` / `HELP`) with DB-backed opt-out state
- Call recording enabled on forwarded calls + recording metadata captured on callbacks
- Twilio webhook protection: production-enforced `X-Twilio-Signature` validation, with shared-token fallback only in non-production
- Webhook observability baseline: correlation IDs (`X-Correlation-Id`), centralized `app.error` reporting, optional alert webhook dispatch

## Local Setup

Expand Down
12 changes: 11 additions & 1 deletion RUNBOOK.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,24 @@
- Vercel runtime logs:
- API route logs for `/api/twilio/voice`, `/api/twilio/status`, `/api/twilio/sms`
- Look for structured prefixes: `twilio.voice`, `twilio.status`, `twilio.sms`, `twilio.messaging`, `twilio.webhook-auth`
- Look for centralized error events: `app.error` (includes `correlationId`, `source`, `event`, and metadata)
- Twilio Console:
- Phone Number webhook logs / Debugger
- Call Logs and Recordings
- Messaging logs
- Neon:
- Query activity / connection issues (if DB errors occur)

## Observability + Alerts

- Every Twilio/Stripe webhook response now includes `X-Correlation-Id`.
- For incident triage, capture the correlation ID from provider delivery logs and search Vercel logs for that ID.
- Optional alert wiring:
1. Set `ALERT_WEBHOOK_URL` in Vercel (Slack/PagerDuty/incident gateway endpoint).
2. Optionally set `ALERT_WEBHOOK_TOKEN` if your endpoint requires bearer auth.
3. Optionally set `ALERT_WEBHOOK_TIMEOUT_MS` (default `4000`).
4. Redeploy and induce a safe synthetic webhook failure in non-production to confirm alert delivery.

## Common Failure Modes

- Twilio webhooks return `401`
Expand All @@ -67,4 +78,3 @@
- `DATABASE_URL` / `DIRECT_DATABASE_URL` swapped
- Missing `sslmode=require`
- `DIRECT_DATABASE_URL` accidentally using Neon pooler host

42 changes: 35 additions & 7 deletions app/api/stripe/webhook/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { NextResponse } from 'next/server';
import Stripe from 'stripe';

import { db } from '@/lib/db';
import { getCorrelationIdFromRequest, reportApplicationError, withCorrelationIdHeader } from '@/lib/observability';
import { RATE_LIMIT_STRIPE_AUTH_MAX, RATE_LIMIT_STRIPE_UNAUTH_MAX, RATE_LIMIT_WINDOW_MS } from '@/lib/rate-limit-config';
import { buildRateLimitHeaders, consumeRateLimit, getClientIpAddress } from '@/lib/rate-limit';
import { getStripe } from '@/lib/stripe';
Expand Down Expand Up @@ -85,10 +86,12 @@ async function handleCheckoutCompleted(session: Stripe.Checkout.Session) {

export async function POST(request: Request) {
const clientIp = getClientIpAddress(request);
const correlationId = getCorrelationIdFromRequest(request);
const withCorrelation = (response: NextResponse) => withCorrelationIdHeader(response, correlationId);
const signature = request.headers.get('stripe-signature');
const webhookSecret = process.env.STRIPE_WEBHOOK_SECRET;
if (!signature || !webhookSecret) {
return NextResponse.json({ error: 'Missing Stripe webhook configuration' }, { status: 400 });
return withCorrelation(NextResponse.json({ error: 'Missing Stripe webhook configuration' }, { status: 400 }));
}

const payload = await request.text();
Expand All @@ -106,16 +109,29 @@ export async function POST(request: Request) {
if (!unauthRateLimit.allowed) {
console.warn('Stripe webhook rate-limited (invalid signature burst)', {
clientIp,
correlationId,
decision: 'reject_429',
});
return NextResponse.json(
return withCorrelation(
NextResponse.json(
{ error: 'Too many invalid webhook attempts' },
{ status: 429, headers: buildRateLimitHeaders(unauthRateLimit) }
)
);
}

const message = error instanceof Error ? error.message : 'Invalid webhook signature';
return NextResponse.json({ error: message }, { status: 400 });
reportApplicationError({
source: 'stripe.webhook',
event: 'invalid_signature',
correlationId,
error,
alert: false,
metadata: {
clientIp,
},
});
return withCorrelation(NextResponse.json({ error: message }, { status: 400 }));
}

const authRateLimit = consumeRateLimit({
Expand All @@ -126,10 +142,13 @@ export async function POST(request: Request) {
if (!authRateLimit.allowed) {
console.warn('Stripe webhook rate-limited', {
clientIp,
correlationId,
eventType: event.type,
decision: 'reject_429',
});
return NextResponse.json({ error: 'Rate limit exceeded' }, { status: 429, headers: buildRateLimitHeaders(authRateLimit) });
return withCorrelation(
NextResponse.json({ error: 'Rate limit exceeded' }, { status: 429, headers: buildRateLimitHeaders(authRateLimit) })
);
}

try {
Expand Down Expand Up @@ -168,9 +187,18 @@ export async function POST(request: Request) {
break;
}
} catch (error) {
console.error('Stripe webhook handler error', error);
return NextResponse.json({ error: 'Webhook processing failed' }, { status: 500 });
reportApplicationError({
source: 'stripe.webhook',
event: 'handler_error',
correlationId,
error,
metadata: {
clientIp,
eventType: event.type,
},
});
return withCorrelation(NextResponse.json({ error: 'Webhook processing failed' }, { status: 500 }));
}

return NextResponse.json({ received: true });
return withCorrelation(NextResponse.json({ received: true }));
}
47 changes: 33 additions & 14 deletions app/api/twilio/sms/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { NextResponse } from 'next/server';

import { findBusinessByTwilioNumber } from '@/lib/business';
import { db } from '@/lib/db';
import { getCorrelationIdFromRequest, withCorrelationIdHeader } from '@/lib/observability';
import { normalizePhoneNumber } from '@/lib/phone';
import { RATE_LIMIT_TWILIO_AUTH_MAX, RATE_LIMIT_TWILIO_UNAUTH_MAX, RATE_LIMIT_WINDOW_MS } from '@/lib/rate-limit-config';
import { buildRateLimitHeaders, consumeRateLimit, getClientIpAddress } from '@/lib/rate-limit';
Expand Down Expand Up @@ -49,6 +50,8 @@ function rateLimitSmsResponse(retryAfterSeconds: number) {

export async function POST(request: Request) {
let messageSid: string | null = null;
const correlationId = getCorrelationIdFromRequest(request);
const withCorrelation = (response: Response) => withCorrelationIdHeader(response, correlationId);
try {
const formData = await request.formData();
const payload = Object.fromEntries(formData.entries()) as Record<string, string>;
Expand All @@ -64,18 +67,19 @@ export async function POST(request: Request) {
});
if (!rateLimit.allowed) {
logTwilioWarn('sms', 'webhook_unauthorized_rate_limited', {
correlationId,
eventType: 'inbound_sms',
decision: 'reject_429',
clientIp,
});
return new NextResponse(
return withCorrelation(new NextResponse(
JSON.stringify({ error: 'Too many unauthorized requests' }),
{ status: 429, headers: { 'Content-Type': 'application/json', ...buildRateLimitHeaders(rateLimit) } }
);
));
}

logTwilioWarn('sms', 'webhook_unauthorized', { decision: 'reject_401' });
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
logTwilioWarn('sms', 'webhook_unauthorized', { correlationId, decision: 'reject_401' });
return withCorrelation(NextResponse.json({ error: 'Unauthorized' }, { status: 401 }));
}

const authRateLimit = consumeRateLimit({
Expand All @@ -85,6 +89,7 @@ export async function POST(request: Request) {
});
if (!authRateLimit.allowed) {
logTwilioWarn('sms', 'webhook_rate_limited', {
correlationId,
eventType: 'inbound_sms',
decision: 'reject_429',
accountSid: accountSid || null,
Expand All @@ -94,7 +99,7 @@ export async function POST(request: Request) {
Object.entries(buildRateLimitHeaders(authRateLimit)).forEach(([name, value]) => {
response.headers.set(name, value);
});
return response;
return withCorrelation(response);
}

const to = normalizePhoneNumber(formField(formData, 'To'));
Expand All @@ -104,27 +109,30 @@ export async function POST(request: Request) {

logTwilioInfo('sms', 'webhook_received', {
messageSid,
correlationId,
eventType: 'inbound_sms',
decision: 'processing',
});

if (!to || !from) {
logTwilioWarn('sms', 'missing_required_fields', {
messageSid,
correlationId,
eventType: 'inbound_sms',
decision: 'noop_missing_to_or_from',
});
return xmlOk();
return withCorrelation(xmlOk());
}

const business = await findBusinessByTwilioNumber(to);
if (!business) {
logTwilioWarn('sms', 'business_not_found', {
messageSid,
correlationId,
eventType: 'inbound_sms',
decision: 'noop_business_not_found',
});
return xmlOk();
return withCorrelation(xmlOk());
}

const inbound = await persistInboundMessage({
Expand All @@ -148,25 +156,27 @@ export async function POST(request: Request) {
if (compliance.handled) {
logTwilioInfo('sms', 'compliance_keyword_handled', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
command: compliance.command,
stateChange: compliance.stateChange,
duplicateInbound: inbound.duplicate,
decision: 'reply_compliance_message',
});
return xmlOk(compliance.replyText);
return withCorrelation(xmlOk(compliance.replyText));
}

if (inbound.duplicate) {
logTwilioInfo('sms', 'duplicate_message_retry', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: null,
decision: 'noop_duplicate',
});
return xmlOk();
return withCorrelation(xmlOk());
}

const lead =
Expand All @@ -186,11 +196,12 @@ export async function POST(request: Request) {
if (!lead) {
logTwilioInfo('sms', 'no_matching_lead', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
decision: 'noop_no_lead_thread',
});
return xmlOk();
return withCorrelation(xmlOk());
}

await db.message.update({
Expand All @@ -209,6 +220,7 @@ export async function POST(request: Request) {
if (!isSubscriptionActive(business.subscriptionStatus) || lead.billingRequired || !business.twilioPhoneNumber) {
logTwilioInfo('sms', 'automation_blocked', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: lead.id,
Expand All @@ -218,7 +230,7 @@ export async function POST(request: Request) {
? 'noop_billing_required'
: 'noop_missing_twilio_number',
});
return xmlOk();
return withCorrelation(xmlOk());
}

const transition = advanceLeadConversation(lead, body, business);
Expand All @@ -237,6 +249,7 @@ export async function POST(request: Request) {

logTwilioInfo('sms', 'state_machine_transition', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand Down Expand Up @@ -272,6 +285,7 @@ export async function POST(request: Request) {
if (ownerSend.suppressed) {
logTwilioWarn('sms', 'owner_notification_suppressed', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -284,6 +298,7 @@ export async function POST(request: Request) {
});
logTwilioInfo('sms', 'owner_notification_sent', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -296,6 +311,7 @@ export async function POST(request: Request) {
'owner_notification_failed',
{
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -318,6 +334,7 @@ export async function POST(request: Request) {
if (leadSend.suppressed) {
logTwilioWarn('sms', 'lead_reply_suppressed', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -334,6 +351,7 @@ export async function POST(request: Request) {

logTwilioInfo('sms', 'lead_reply_sent', {
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -346,6 +364,7 @@ export async function POST(request: Request) {
'lead_reply_send_failed',
{
messageSid,
correlationId,
eventType: 'inbound_sms',
businessId: business.id,
leadId: updatedLead.id,
Expand All @@ -355,9 +374,9 @@ export async function POST(request: Request) {
);
}

return xmlOk();
return withCorrelation(xmlOk());
} catch (error) {
logTwilioError('sms', 'route_error', { messageSid, eventType: 'inbound_sms', decision: 'return_retryable_503' }, error);
return retryableErrorResponse();
logTwilioError('sms', 'route_error', { messageSid, correlationId, eventType: 'inbound_sms', decision: 'return_retryable_503' }, error);
return withCorrelation(retryableErrorResponse());
}
}
Loading