Skip to content

Commit 4435228

Browse files
committed
test(#26): add integration tests for ScrapingBee client; fix Search() to use HTML parsing (remove broken AI path)
1 parent fb96773 commit 4435228

2 files changed

Lines changed: 127 additions & 34 deletions

File tree

backend/internal/service/kickstarter_scraping.go

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ func NewKickstarterScrapingService(apiKey string, maxConcurrent int) *Kickstarte
2525
}
2626
}
2727

28-
// Search searches for campaigns using AI extraction (6 credits per request).
28+
// Search searches for campaigns using HTML parsing (1 credit per request).
29+
// Note: AI extraction was removed — Kickstarter embeds project data in [data-project]
30+
// HTML attributes, not text nodes, so ScrapingBee AI returns EMPTY_RESPONSE for that selector.
2931
func (s *KickstarterScrapingService) Search(term, categoryID, sort, cursor string, first int) (*SearchResult, error) {
3032
ctx := context.Background()
3133

@@ -37,39 +39,8 @@ func (s *KickstarterScrapingService) Search(term, categoryID, sort, cursor strin
3739
}
3840
}
3941

40-
// Build Kickstarter discover URL with page
4142
discoverURL := s.buildDiscoverURL(term, categoryID, sort, page)
4243

43-
// Try AI extraction first; ai_selector focuses only on project cards, reducing processing time.
44-
aiQuery := "Extract all projects from this page. For each project return a JSON object with these fields: name, slug, creator_slug (the creator's URL slug, e.g. 'john-doe' from kickstarter.com/projects/john-doe/...), project_url (full canonical Kickstarter URL), goal, pledged, currency, deadline, creator, category, photo_url, blurb."
45-
aiSelector := "[data-project]"
46-
47-
aiResult, err := s.client.ExtractWithAI(ctx, discoverURL, aiQuery, aiSelector)
48-
if err == nil {
49-
campaigns, parseErr := s.parseAIResponse(aiResult)
50-
if parseErr == nil && len(campaigns) > 0 {
51-
log.Printf("AI extraction successful: found %d campaigns (page %d)", len(campaigns), page)
52-
53-
// Generate next cursor if we got a full page
54-
nextCursor := ""
55-
hasNextPage := len(campaigns) >= first
56-
if hasNextPage {
57-
nextCursor = fmt.Sprintf("page:%d", page+1)
58-
}
59-
60-
return &SearchResult{
61-
Campaigns: campaigns,
62-
TotalCount: len(campaigns),
63-
NextCursor: nextCursor,
64-
HasNextPage: hasNextPage,
65-
}, nil
66-
}
67-
log.Printf("AI extraction parse failed: %v, falling back to HTML", parseErr)
68-
} else {
69-
log.Printf("AI extraction failed: %v, falling back to HTML", err)
70-
}
71-
72-
// Fallback to HTML parsing
7344
html, err := s.client.FetchHTMLInSession(ctx, discoverURL, 0)
7445
if err != nil {
7546
return nil, fmt.Errorf("fetch HTML: %w", err)
@@ -80,9 +51,8 @@ func (s *KickstarterScrapingService) Search(term, categoryID, sort, cursor strin
8051
return nil, fmt.Errorf("parse HTML: %w", err)
8152
}
8253

83-
log.Printf("HTML parsing successful: found %d campaigns (page %d)", len(campaigns), page)
54+
log.Printf("Search: found %d campaigns for term=%q page=%d", len(campaigns), term, page)
8455

85-
// Generate next cursor if we got a full page
8656
nextCursor := ""
8757
hasNextPage := len(campaigns) >= first
8858
if hasNextPage {
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
//go:build integration
2+
3+
package service
4+
5+
import (
6+
"context"
7+
"os"
8+
"strings"
9+
"testing"
10+
)
11+
12+
// Run with:
13+
// SCRAPINGBEE_API_KEY=<key> go test -v -tags integration -run TestScrapingBee ./internal/service/
14+
// SCRAPINGBEE_API_KEY=<key> go test -v -tags integration -timeout 120s ./internal/service/
15+
16+
func testClient(t *testing.T) *ScrapingBeeClient {
17+
t.Helper()
18+
key := os.Getenv("SCRAPINGBEE_API_KEY")
19+
if key == "" {
20+
t.Fatal("SCRAPINGBEE_API_KEY env var not set")
21+
}
22+
return NewScrapingBeeClient(key, 2)
23+
}
24+
25+
func TestScrapingBee_FetchUsage(t *testing.T) {
26+
client := testClient(t)
27+
usage, err := client.FetchUsage(context.Background())
28+
if err != nil {
29+
t.Fatalf("FetchUsage error: %v", err)
30+
}
31+
t.Logf("Usage: %d/%d credits (%.1f%%), renews %s",
32+
usage.UsedCredits, usage.MaxCredits,
33+
float64(usage.UsedCredits)/float64(usage.MaxCredits)*100,
34+
usage.RenewalDate,
35+
)
36+
if usage.MaxCredits <= 0 {
37+
t.Errorf("expected positive MaxCredits, got %d", usage.MaxCredits)
38+
}
39+
}
40+
41+
// TestScrapingBee_FetchHTMLInSession costs 1 credit.
42+
func TestScrapingBee_FetchHTMLInSession(t *testing.T) {
43+
client := testClient(t)
44+
targetURL := "https://www.kickstarter.com/discover/advanced?category_id=16&sort=magic"
45+
html, err := client.FetchHTMLInSession(context.Background(), targetURL, 42)
46+
if err != nil {
47+
t.Fatalf("FetchHTMLInSession error: %v", err)
48+
}
49+
if len(html) < 1000 {
50+
t.Errorf("HTML response suspiciously short: %d bytes", len(html))
51+
}
52+
if !strings.Contains(html, "kickstarter") {
53+
t.Errorf("response does not look like a Kickstarter page")
54+
}
55+
t.Logf("FetchHTMLInSession: got %d bytes", len(html))
56+
}
57+
58+
// TestScrapingBee_DiscoverCampaigns costs 1 credit and validates HTML parsing.
59+
func TestScrapingBee_DiscoverCampaigns(t *testing.T) {
60+
key := os.Getenv("SCRAPINGBEE_API_KEY")
61+
if key == "" {
62+
t.Fatal("SCRAPINGBEE_API_KEY env var not set")
63+
}
64+
svc := NewKickstarterScrapingService(key, 2)
65+
// category_id=16 = Technology; sort=magic; page=1
66+
campaigns, err := svc.DiscoverCampaigns("16", "magic", 1, 42)
67+
if err != nil {
68+
t.Fatalf("DiscoverCampaigns error: %v", err)
69+
}
70+
if len(campaigns) == 0 {
71+
t.Fatal("expected at least one campaign, got 0")
72+
}
73+
t.Logf("DiscoverCampaigns: got %d campaigns", len(campaigns))
74+
for i, c := range campaigns {
75+
if c.PID == "" {
76+
t.Errorf("campaign[%d] has empty PID", i)
77+
}
78+
if c.ProjectURL == "" {
79+
t.Errorf("campaign[%d] has empty ProjectURL", i)
80+
}
81+
t.Logf(" [%d] %s — %s", i, c.PID, c.Name)
82+
}
83+
}
84+
85+
// TestScrapingBee_Search costs 1 credit and validates the Search() method.
86+
func TestScrapingBee_Search(t *testing.T) {
87+
key := os.Getenv("SCRAPINGBEE_API_KEY")
88+
if key == "" {
89+
t.Fatal("SCRAPINGBEE_API_KEY env var not set")
90+
}
91+
svc := NewKickstarterScrapingService(key, 2)
92+
result, err := svc.Search("keyboard", "16", "magic", "", 12)
93+
if err != nil {
94+
t.Fatalf("Search error: %v", err)
95+
}
96+
if len(result.Campaigns) == 0 {
97+
t.Fatal("expected at least one campaign, got 0")
98+
}
99+
t.Logf("Search: got %d campaigns, hasNextPage=%v", len(result.Campaigns), result.HasNextPage)
100+
for i, c := range result.Campaigns {
101+
t.Logf(" [%d] %s — %s", i, c.PID, c.Name)
102+
}
103+
}
104+
105+
// TestScrapingBee_ExtractWithAI_RawAPI documents that AI extraction returns
106+
// EMPTY_RESPONSE for [data-project] because project data is stored in HTML
107+
// attributes (not text nodes). This test is informational — it verifies the
108+
// raw API call succeeds and logs what ScrapingBee actually returns.
109+
func TestScrapingBee_ExtractWithAI_RawAPI(t *testing.T) {
110+
client := testClient(t)
111+
targetURL := "https://www.kickstarter.com/discover/advanced?category_id=16&sort=magic"
112+
// Note: [data-project] holds JSON in an attribute, not text — AI sees no text to extract.
113+
result, err := client.ExtractWithAI(context.Background(), targetURL,
114+
"Extract all project names visible on the page.", "h2 a")
115+
if err != nil {
116+
t.Fatalf("ExtractWithAI error: %v", err)
117+
}
118+
preview := result
119+
if len(preview) > 300 {
120+
preview = preview[:300]
121+
}
122+
t.Logf("ExtractWithAI (h2 a selector) result (%d bytes): %s", len(result), preview)
123+
}

0 commit comments

Comments
 (0)