diff --git a/README.md b/README.md index ea58cd6..e806f5e 100644 --- a/README.md +++ b/README.md @@ -71,12 +71,44 @@ Keyword blacklist in `config/default.json`: ## Data Model -`data/listings.db` is created automatically. +### Table `listings` +One row per unique listing, deduplicated by provider ID. Stores all scraped content plus user state (`is_seen`, `is_favorite`, `is_blacklisted`). Tracks when a listing was first and last seen and its position in the most recent scrape. -- **`listings`** – listings with price, size, address, timestamps and flags (`is_seen`, `is_favorite`, `is_blacklisted`) -- **`search_configs`** – search agent configurations -- **`scrape_runs`** – run history per agent -- **`blacklist`** – permanent exclusions by ID or URL +``` +id · source · title · price · size · rooms · address · description · publisher +link · image · images · provider · listing_type +is_seen · is_favorite · is_blacklisted · blacklisted_at · favorited_at +first_seen · last_seen · listed_at · available_from · scrape_rank +``` + +### Table `search_configs` +One row per search agent. Defines provider, listing type, page limit, search URL, and enabled state. + +``` +id · name · provider · listing_type · max_pages · extra_params · enabled · created_at +``` + +### Table `listing_agents` +Junction table linking listings to the agents that found them (n:m). Records the rank within that agent's run and which run last actively scraped the listing. + +``` +listing_id · search_config_id · scrape_rank · last_scraped_run_id +``` + +### Table `scrape_runs` +Log of every scrape execution with timing, counts, and error info. + +``` +id · source · provider · listing_type · search_config_id +started_at · ended_at · status · new_count · total_count · error +``` + +### Table `blacklist` +Blocked listing IDs or URLs, including keyword-based blocks that have no matching listing row. + +``` +id · listing_id · url · created_at +``` --- @@ -91,11 +123,11 @@ src/ Express backend server.js Entry point, middleware, routes routes/ listings, scraper, configs scrapers/engine.js Playwright runner + CSS selector config - providers/ Adapter registry + Kleinanzeigen implementation + providers/ Adapter registry + provider implementations services/ Scrape orchestration per agent db/database.js node:sqlite – schema, migrations, upserts -config/default.json Keyword & neighborhood blacklist +config/default.json Global blacklist keyword config data/listings.db SQLite file (auto-created) ``` @@ -104,16 +136,41 @@ data/listings.db SQLite file (auto-created) ## API ``` -GET /api/listings Fetch listings (filter via query params) -PATCH /api/listings/:id/seen Mark as seen -PATCH /api/listings/:id/favorite Toggle favorite -POST /api/listings/:id/blacklist Blacklist listing -DELETE /api/listings/:id/blacklist Remove from blacklist -POST /api/scrape Scrape all active agents -POST /api/scrape/:configId Scrape a single agent -GET /api/configs Get agents -POST /api/configs Create agent -GET /api/providers List available providers +GET /api/listings Fetch listings (filter via query params) +GET /api/listings/stats Aggregate listing counters +GET /api/listings/stats/per-config Listing counters per agent + orphan stats +GET /api/listings/runs Recent scrape runs +PATCH /api/listings/seen-all Mark all listings as seen +PATCH /api/listings/:id/seen Mark a listing as seen +PATCH /api/listings/:id/unseen Mark a listing as unseen +PATCH /api/listings/:id/favorite Toggle favorite +POST /api/listings/:id/blacklist Blacklist a listing +DELETE /api/listings/:id/blacklist Remove listing from blacklist +DELETE /api/listings/reset Delete unpinned listings +DELETE /api/listings/reset/:configId Delete unpinned listings for one agent +DELETE /api/listings/clear-favorites Clear all favorites +DELETE /api/listings/clear-favorites/:configId + Clear favorites for one agent +DELETE /api/listings/clear-blacklist Clear blacklist flags +DELETE /api/listings/clear-blacklist/:configId + Clear blacklist flags for one agent +GET /api/listings/:id/images Fetch or return cached gallery images +POST /api/listings/batch-images Batch image fetch for listing cards + +GET /api/configs Get agents +POST /api/configs Create agent +PATCH /api/configs/:id Update agent +DELETE /api/configs/:id Delete agent +POST /api/configs/infer-url Infer provider + listing type from URL + +POST /api/scrape Scrape all active agents +POST /api/scrape/:configId Scrape a single enabled agent +POST /api/scrape/stop Cancel a running scrape +GET /api/scrape/status Current scrape progress +GET /api/scrape/config Read global scrape config +PATCH /api/scrape/config Update global scrape config + +GET /api/providers List available providers ``` --- diff --git a/client/src/App.jsx b/client/src/App.jsx index c1661cd..4a92697 100644 --- a/client/src/App.jsx +++ b/client/src/App.jsx @@ -18,21 +18,6 @@ import { api } from './api.js'; import { TABS, ITEMS_PER_PAGE, LISTING_TYPE_LABELS, LISTING_TYPE_COLORS, PROVIDER_COLORS, PROVIDER_LABELS } from './constants.js'; const FILTERS_STORAGE_KEY = 'immo.filters.v1'; -const EMPTY_STATS = { total: 0, unseen: 0, favorites: 0, blacklisted: 0 }; - -function buildStatsFromListings(list) { - const stats = { ...EMPTY_STATS }; - for (const listing of list) { - if (listing.is_blacklisted) { - stats.blacklisted += 1; - continue; - } - stats.total += 1; - if (!listing.is_seen) stats.unseen += 1; - if (listing.is_favorite) stats.favorites += 1; - } - return stats; -} function readPersistedFilters() { const defaults = { @@ -69,7 +54,7 @@ export default function App() { }, []); const { - listings, loading, orphanStats, + listings, loading, stats, orphanStats, configStats, loadListings, loadStats, loadConfigStats, handleSeen, handleFavorite, handleBlacklist, handleUnblacklist, handleMarkAllSeen, handleReset, handleResetConfig, @@ -142,19 +127,24 @@ export default function App() { /* config selection handler – always resets tab to ALL when switching */ const handleSelectConfig = useCallback((configId) => { setActiveConfigId(configId ?? null); - currentListingParamsRef.current = { include_blacklisted: true }; + const params = { include_blacklisted: true }; + if (configId) params.search_config_id = configId; + currentListingParamsRef.current = params; + loadListings(params); setActiveTab(TABS.ALL); setPage(1); - }, []); + }, [loadListings]); /* navigate home: deselect agent, reset to unseen tab */ const handleNavigateHome = useCallback(() => { setActiveConfigId(null); - currentListingParamsRef.current = { include_blacklisted: true }; + const params = { include_blacklisted: true }; + currentListingParamsRef.current = params; + loadListings(params); setActiveTab(TABS.UNSEEN); setPage(1); setSearchQuery(''); setMinPrice(''); setMaxPrice(''); setMinSize(''); setMinRooms(''); setListingTypeFilter(''); setProviderFilter(''); setPublisherFilter(''); setMaxAvailableFrom(''); - }, []); + }, [loadListings]); const isProviderFilterActive = !activeConfigId && activeTab === TABS.ALL; @@ -183,10 +173,9 @@ export default function App() { }, [listings, listingTypeFilter, publisherFilter, searchQuery, minPrice, maxPrice, minSize, minRooms, maxAvailableFrom, scrapeConfig]); const filteredBase = useMemo(() => { - if (activeConfigId) return uiFilteredListings.filter(l => l.search_config_id === activeConfigId); if (isProviderFilterActive && providerFilter) return uiFilteredListings.filter(l => l.provider === providerFilter); return uiFilteredListings; - }, [uiFilteredListings, activeConfigId, isProviderFilterActive, providerFilter]); + }, [uiFilteredListings, isProviderFilterActive, providerFilter]); const filtered = useMemo(() => { let list = [...filteredBase]; @@ -212,13 +201,17 @@ export default function App() { }, [filteredBase, activeTab]); const tabCounts = useMemo(() => { + let base = listings; + if (listingTypeFilter) base = base.filter(l => l.listing_type === listingTypeFilter); + if (isProviderFilterActive && providerFilter) base = base.filter(l => l.provider === providerFilter); + const nonBlacklisted = base.filter(l => !l.is_blacklisted); return { - [TABS.ALL]: filteredBase.filter(l => !l.is_blacklisted).length, - [TABS.UNSEEN]: filteredBase.filter(l => !l.is_blacklisted && !l.is_seen).length, - [TABS.FAVORITES]: filteredBase.filter(l => !l.is_blacklisted && l.is_favorite).length, - [TABS.BLACKLISTED]: filteredBase.filter(l => l.is_blacklisted).length, + [TABS.ALL]: nonBlacklisted.length, + [TABS.UNSEEN]: nonBlacklisted.filter(l => !l.is_seen).length, + [TABS.FAVORITES]: nonBlacklisted.filter(l => l.is_favorite).length, + [TABS.BLACKLISTED]: base.filter(l => l.is_blacklisted).length, }; - }, [filteredBase]); + }, [listings, listingTypeFilter, isProviderFilterActive, providerFilter]); const activeConfigStats = useMemo(() => ({ total: tabCounts[TABS.ALL], @@ -227,30 +220,6 @@ export default function App() { blacklisted: tabCounts[TABS.BLACKLISTED], }), [tabCounts]); - const sidebarConfigStats = useMemo(() => { - const map = {}; - for (const cfg of configs) map[cfg.id] = { ...EMPTY_STATS }; - for (const listing of uiFilteredListings) { - const configId = listing.search_config_id; - if (configId == null) continue; - if (!map[configId]) map[configId] = { ...EMPTY_STATS }; - const bucket = map[configId]; - if (listing.is_blacklisted) { - bucket.blacklisted += 1; - continue; - } - bucket.total += 1; - if (!listing.is_seen) bucket.unseen += 1; - if (listing.is_favorite) bucket.favorites += 1; - } - return map; - }, [configs, uiFilteredListings]); - - const sidebarGlobalStats = useMemo(() => ( - buildStatsFromListings(uiFilteredListings) - ), [uiFilteredListings]); - - const pages = Math.max(1, Math.ceil(filtered.length / ITEMS_PER_PAGE)); const safePage = Math.min(page, pages); const paginated = filtered.slice((safePage - 1) * ITEMS_PER_PAGE, safePage * ITEMS_PER_PAGE); @@ -301,9 +270,9 @@ export default function App() { setSidebarOpen(false)} onReset={() => handleReset(currentListingParamsRef.current)} showToast={showToast} - onSaved={loadScrapeConfig} + onSaved={() => { loadScrapeConfig(); reloadAll(); }} onClearFavorites={() => askConfirm({ title: 'Alle Favoriten löschen?', diff --git a/client/src/components/FilterBar.jsx b/client/src/components/FilterBar.jsx index fff61e2..8108f86 100644 --- a/client/src/components/FilterBar.jsx +++ b/client/src/components/FilterBar.jsx @@ -3,12 +3,12 @@ import { TABS, LISTING_TYPE_LABELS, PROVIDER_LABELS } from '../constants.js'; export default function FilterBar({ activeTab, stats, listingTypeFilter, searchQuery, minPrice, maxPrice, minSize, minRooms, - publisherFilter, providerFilter, providers, showProviderFilter, + publisherFilter, providerFilter, providers, showProviderFilter, showListingTypeFilter = true, tabCounts, maxAvailableFrom, onTabChange, onListingTypeChange, onSearch, onMinPrice, onMaxPrice, onMinSize, onMinRooms, onPublisherFilter, onProviderFilter, onMaxAvailableFrom, onReset, }) { - const hasFilters = searchQuery || minPrice || maxPrice || minSize || minRooms || publisherFilter || maxAvailableFrom || (showProviderFilter && providerFilter); + const hasFilters = searchQuery || minPrice || maxPrice || minSize || minRooms || publisherFilter || maxAvailableFrom || (showProviderFilter && providerFilter) || (showListingTypeFilter && listingTypeFilter); const tabs = [ { id: TABS.ALL, label: 'Alle', count: stats.total, icon: ( @@ -48,9 +48,11 @@ export default function FilterBar({
- + {showListingTypeFilter && ( + + )} {showProviderFilter && (