-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpython-batch.py
More file actions
125 lines (102 loc) · 4.06 KB
/
python-batch.py
File metadata and controls
125 lines (102 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Vinted Smart Scraper — Python batch example.
Runs 26 EU country scrapes in parallel using the Apify client async API,
deduplicates items by `vintedId`, ranks by arbitrage spread, and writes
the top 100 to a CSV.
Setup:
pip install apify-client pandas
export APIFY_TOKEN="<your-token>"
python examples/python-batch.py "vintage carhartt"
Cost: ~$0.018 × 26 starts + $0.0005 × ~5000 results ≈ $2.97 per full sweep.
"""
from __future__ import annotations
import asyncio
import csv
import os
import sys
from pathlib import Path
from apify_client import ApifyClientAsync
EU_COUNTRIES = [
"fr", "es", "it", "de", "be", "lu", "nl", "pt", "at",
"ie", "fi", "se", "dk", "lt", "lv", "ee", "cz", "sk",
"pl", "hu", "ro", "bg", "hr", "si", "gr", "uk",
]
async def scrape_country(client: ApifyClientAsync, country: str, query: str) -> list[dict]:
"""Run the actor for a single country, return its items."""
try:
run = await client.actor("kazkn/vinted-smart-scraper").call(
run_input={
"searchText": query,
"countries": [country],
"maxResults": 200,
"currency": "EUR",
}
)
items = []
async for item in client.dataset(run["defaultDatasetId"]).iterate_items():
items.append(item)
print(f" {country}: {len(items)} items")
return items
except Exception as e:
print(f" {country}: FAILED ({e})")
return []
async def main(query: str) -> None:
token = os.environ.get("APIFY_TOKEN")
if not token:
sys.exit("Set APIFY_TOKEN env var first")
client = ApifyClientAsync(token)
print(f"Sweeping {len(EU_COUNTRIES)} countries for: {query}")
print("This will take ~30-60 seconds (parallel, capped by your Apify concurrency).\n")
all_items_per_country = await asyncio.gather(
*(scrape_country(client, c, query) for c in EU_COUNTRIES)
)
# Dedup by vintedId, keep cheapest occurrence
by_id: dict[str, dict] = {}
for items in all_items_per_country:
for item in items:
vid = str(item.get("vintedId") or item.get("id"))
if vid not in by_id or item["priceEur"] < by_id[vid]["priceEur"]:
by_id[vid] = item
deduped = list(by_id.values())
print(f"\nDeduped: {sum(len(x) for x in all_items_per_country)} → {len(deduped)} items")
# Compute spread per item: max country price / min country price - 1
# Build a per-item country price map
by_title: dict[str, list[dict]] = {}
for item in deduped:
key = (item.get("title") or "").strip().lower()
if not key:
continue
by_title.setdefault(key, []).append(item)
ranked = []
for title, group in by_title.items():
if len(group) < 2:
continue
prices = sorted(group, key=lambda i: i["priceEur"])
cheapest, dearest = prices[0], prices[-1]
spread = (dearest["priceEur"] - cheapest["priceEur"]) / cheapest["priceEur"]
if spread <= 0:
continue
ranked.append({
"title": title,
"buy_country": cheapest["country"],
"buy_price_eur": round(cheapest["priceEur"], 2),
"sell_country": dearest["country"],
"sell_price_eur": round(dearest["priceEur"], 2),
"spread_pct": round(spread * 100, 1),
"buy_url": cheapest.get("permalink"),
"sell_url": dearest.get("permalink"),
})
ranked.sort(key=lambda r: r["spread_pct"], reverse=True)
top = ranked[:100]
out = Path(f"vinted-arbitrage-{query.replace(' ', '_')}.csv")
with out.open("w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=top[0].keys() if top else ["title"])
writer.writeheader()
writer.writerows(top)
print(f"\nTop 100 spreads written to {out}")
if top:
print(f"\n#1 spread:")
for k, v in top[0].items():
print(f" {k}: {v}")
if __name__ == "__main__":
query = sys.argv[1] if len(sys.argv) > 1 else "vintage carhartt"
asyncio.run(main(query))