diff --git a/src/scrapers/imdb/CMakeLists.txt b/src/scrapers/imdb/CMakeLists.txt index ed071b41b0..50c22847c0 100644 --- a/src/scrapers/imdb/CMakeLists.txt +++ b/src/scrapers/imdb/CMakeLists.txt @@ -1,6 +1,6 @@ add_library( - mediaelch_scraper_imdb_api OBJECT ImdbApi.cpp ImdbReferencePage.cpp - ImdbJsonParser.cpp ImdbSearchPage.cpp + mediaelch_scraper_imdb_api OBJECT ImdbApi.cpp ImdbJsonParser.cpp + ImdbSearchPage.cpp ) target_link_libraries( diff --git a/src/scrapers/imdb/ImdbApi.cpp b/src/scrapers/imdb/ImdbApi.cpp index 7a268636bd..1d9a95d351 100644 --- a/src/scrapers/imdb/ImdbApi.cpp +++ b/src/scrapers/imdb/ImdbApi.cpp @@ -1,13 +1,13 @@ #include "ImdbApi.h" -#include "Version.h" #include "log/Log.h" #include "network/NetworkRequest.h" +#include "scrapers/imdb/ImdbGraphQLQueries.h" #include "utils/Meta.h" +#include #include #include -#include #include #include #include @@ -29,17 +29,40 @@ bool ImdbApi::isInitialized() const return true; } -void ImdbApi::sendGetRequest(const Locale& locale, const QUrl& url, ImdbApi::ApiCallback callback) +QUrl ImdbApi::makeFullUrl(const QString& suffix) +{ + MediaElch_Debug_Expects(suffix.startsWith('/')); + return {"https://www.imdb.com" + suffix}; +} + +QUrl ImdbApi::makeFullAssetUrl(const QString& suffix) +{ + return {"https://www.imdb.com" + suffix}; +} + +QUrl ImdbApi::makeSuggestUrl(const QString& query) +{ + // The Suggest API uses the first character of the query as a path segment. + // e.g. "inception" -> https://v3.sg.media-imdb.com/suggestion/x/inception.json + // The path letter doesn't seem to matter, so we use 'x' for simplicity. + QString normalized = query.toLower().trimmed(); + normalized = QString(QUrl::toPercentEncoding(normalized)); + return QUrl(QStringLiteral("https://v3.sg.media-imdb.com/suggestion/x/%1.json").arg(normalized)); +} + +QUrl ImdbApi::makeGraphQLUrl() { + return QUrl(QStringLiteral("https://graphql.imdb.com/")); +} + +void ImdbApi::suggestSearch(const QString& query, ImdbApi::ApiCallback callback) +{ + const QUrl url = makeSuggestUrl(query); QNetworkRequest request = mediaelch::network::requestWithDefaults(url); - addHeadersToRequest(locale, request); - // The IMDB does not accept requests with the MediaElch HTTP request user agent mediaelch::network::useFirefoxUserAgent(request); if (m_network.cache().hasValidElement(request)) { - // Do not immediately run the callback because classes higher up may - // set up a Qt connection while the network request is running. - QTimer::singleShot(0, this, [cb = std::move(callback), element = m_network.cache().getElement(request)]() { // + QTimer::singleShot(0, this, [cb = std::move(callback), element = m_network.cache().getElement(request)]() { cb(element, {}); }); return; @@ -47,138 +70,106 @@ void ImdbApi::sendGetRequest(const Locale& locale, const QUrl& url, ImdbApi::Api QNetworkReply* reply = m_network.getWithWatcher(request); - connect(reply, &QNetworkReply::finished, this, [reply, cb = std::move(callback), request, locale, this]() { + connect(reply, &QNetworkReply::finished, this, [reply, cb = std::move(callback), request, this]() { auto dls = makeDeleteLaterScope(reply); - QString html; + QString data; if (reply->error() == QNetworkReply::NoError) { - html = QString::fromUtf8(reply->readAll()); - - if (!html.isEmpty()) { - m_network.cache().addElement(request, html); + data = QString::fromUtf8(reply->readAll()); + if (!data.isEmpty()) { + m_network.cache().addElement(request, data); } } else { - qCWarning(generic) << "[ImdbTv][Api] Network Error:" << reply->errorString() << "for URL" << reply->url(); + qCWarning(generic) << "[ImdbApi] Suggest API Network Error:" << reply->errorString() << "for URL" + << reply->url(); } - ScraperError error = makeScraperError(html, *reply, {}); - cb(html, error); + ScraperError error = makeScraperError(data, *reply, {}); + cb(data, error); }); } -void ImdbApi::searchForShow(const Locale& locale, const QString& query, ImdbApi::ApiCallback callback) -{ - sendGetRequest(locale, makeShowSearchUrl(query), std::move(callback)); -} - -void ImdbApi::searchForMovie(const Locale& locale, - const QString& query, - bool includeAdult, +void ImdbApi::sendGraphQLRequest(const QString& query, + const QJsonObject& variables, ImdbApi::ApiCallback callback) { - sendGetRequest(locale, makeMovieSearchUrl(query, includeAdult), std::move(callback)); -} - -void mediaelch::scraper::ImdbApi::loadTitle(const Locale& locale, - const ImdbId& movieId, - PageKind page, - ImdbApi::ApiCallback callback) -{ - sendGetRequest(locale, makeTitleUrl(movieId, page), callback); -} - -void ImdbApi::loadDefaultEpisodesPage(const Locale& locale, const ImdbId& showId, ImdbApi::ApiCallback callback) -{ - sendGetRequest(locale, makeDefaultEpisodesUrl(showId), callback); -} + QJsonObject body; + body["query"] = query; + if (!variables.isEmpty()) { + body["variables"] = variables; + } + const QByteArray postData = QJsonDocument(body).toJson(QJsonDocument::Compact); + + // The WebsiteCache keys by URL only (QMap). + // Since all GraphQL requests go to the same URL, we append a hash of the POST body + // as a query parameter to create unique cache keys. + QUrl cacheUrl = makeGraphQLUrl(); + QUrlQuery cacheQuery; + cacheQuery.addQueryItem("_body", + QString::fromLatin1(QCryptographicHash::hash(postData, QCryptographicHash::Md5).toHex())); + cacheUrl.setQuery(cacheQuery); + + QNetworkRequest cacheRequest = mediaelch::network::jsonRequestWithDefaults(cacheUrl); + + if (m_network.cache().hasValidElement(cacheRequest)) { + QTimer::singleShot( + 0, this, [cb = std::move(callback), element = m_network.cache().getElement(cacheRequest)]() { + cb(element, {}); + }); + return; + } -void ImdbApi::loadSeason(const Locale& locale, const ImdbId& showId, SeasonNumber season, ImdbApi::ApiCallback callback) -{ - sendGetRequest(locale, makeSeasonUrl(showId, season), callback); -} + // The actual request goes to the real URL (without the cache query parameter) + QNetworkRequest request = mediaelch::network::jsonRequestWithDefaults(makeGraphQLUrl()); + mediaelch::network::useFirefoxUserAgent(request); -void ImdbApi::addHeadersToRequest(const Locale& locale, QNetworkRequest& request) -{ - request.setRawHeader("Accept-Language", locale.toString('-').toLocal8Bit()); + QNetworkReply* reply = m_network.postWithWatcher(request, postData); - QNetworkCookie languageCookie("lc-main", locale.toString('_').toLocal8Bit()); - QList cookies{{languageCookie}}; - request.setHeader(QNetworkRequest::CookieHeader, QVariant::fromValue(cookies)); -} + connect(reply, &QNetworkReply::finished, this, [reply, cb = std::move(callback), cacheRequest, this]() { + auto dls = makeDeleteLaterScope(reply); -QUrl ImdbApi::makeTitleUrl(const ImdbId& id, PageKind page) const -{ - const QString pageStr = [page]() { - switch (page) { - case PageKind::Main: return ""; - case PageKind::Reference: return "reference"; - case PageKind::PlotSummary: return "plotsummary"; - case PageKind::ReleaseInfo: return "releaseinfo"; - case PageKind::Keywords: return "keywords"; - case PageKind::Episodes: return "episodes"; + QString data; + if (reply->error() == QNetworkReply::NoError) { + data = QString::fromUtf8(reply->readAll()); + if (!data.isEmpty()) { + m_network.cache().addElement(cacheRequest, data); + } + } else { + qCWarning(generic) << "[ImdbApi] GraphQL Network Error:" << reply->errorString() << "for URL" + << reply->url(); } - qCCritical(generic, "[ImdbApi] Unhandled page key!"); - return ""; - }(); - return makeFullUrl(QStringLiteral("/title/%1/%2").arg(id.toString(), pageStr)); -} - -QUrl ImdbApi::makeMovieSearchUrl(const QString& searchStr, bool includeAdult) const -{ - // e.g. https://www.imdb.com/de/search/title/?title=finding%20dori&title_type=feature,tv_movie,short,video,tv_short - QUrlQuery queries; - if (includeAdult) { - queries.addQueryItem("adult", "include"); - } - queries.addQueryItem("title", searchStr); - queries.addQueryItem("title_type", "feature,tv_movie,short,video,tv_short"); // Movie categories - queries.addQueryItem("view", "simple"); - queries.addQueryItem("count", "100"); - return makeFullUrl("/search/title/?" + queries.toString()); -} - -QUrl ImdbApi::makeFullUrl(const QString& suffix) -{ - MediaElch_Debug_Expects(suffix.startsWith('/')); - return {"https://www.imdb.com" + suffix}; -} -QUrl ImdbApi::makeFullAssetUrl(const QString& suffix) -{ - return {"https://www.imdb.com" + suffix}; + ScraperError error = makeScraperError(data, *reply, {}); + cb(data, error); + }); } -QUrl ImdbApi::makeShowSearchUrl(const QString& searchStr) const +void ImdbApi::loadTitleViaGraphQL(const ImdbId& id, ImdbApi::ApiCallback callback) { - if (ImdbId::isValidFormat(searchStr)) { - return makeFullUrl(QStringLiteral("/title/") + searchStr + '/'); - } - - // e.g. https://www.imdb.com/search/title/?title=Family%20Guy&title_type=tv_series,tv_miniseries&view=simple - // An alternative (if it breaks in the future) would be - // e.g. https://www.imdb.com/find/?q=scrubs&s=tt&ttype=tv&ref_=fn_tv - QUrlQuery queries; - queries.addQueryItem("title", searchStr); - queries.addQueryItem("title_type", "tv_series,tv_miniseries"); - queries.addQueryItem("view", "simple"); - queries.addQueryItem("count", "100"); - return makeFullUrl("/search/title/?" + queries.toString()); + QJsonObject variables; + variables["id"] = id.toString(); + sendGraphQLRequest(ImdbGraphQLQueries::TITLE_DETAILS, variables, std::move(callback)); } -QUrl ImdbApi::makeSeasonUrl(const ImdbId& showId, SeasonNumber season) const +void ImdbApi::loadEpisodesViaGraphQL(const ImdbId& showId, int limit, ImdbApi::ApiCallback callback) { - // e.g. https://www.imdb.com/title/tt0096697/episodes/?season=10 - QUrlQuery queries; - queries.addQueryItem("season", season.toString()); - return makeFullUrl(QStringLiteral("/title/") + showId.toString() + // - QStringLiteral("/episodes?") + queries.toString()); + QJsonObject variables; + variables["id"] = showId.toString(); + variables["first"] = limit; + sendGraphQLRequest(ImdbGraphQLQueries::SEASON_EPISODES, variables, std::move(callback)); } -QUrl ImdbApi::makeDefaultEpisodesUrl(const ImdbId& showId) const +void ImdbApi::loadSeasonEpisodesViaGraphQL(const ImdbId& showId, + int season, + int limit, + ImdbApi::ApiCallback callback) { - return makeTitleUrl(showId, PageKind::Episodes); + QJsonObject variables; + variables["id"] = showId.toString(); + variables["first"] = limit; + variables["season"] = QString::number(season); + sendGraphQLRequest(ImdbGraphQLQueries::SEASON_EPISODES_FILTERED, variables, std::move(callback)); } - } // namespace scraper } // namespace mediaelch diff --git a/src/scrapers/imdb/ImdbApi.h b/src/scrapers/imdb/ImdbApi.h index 06383a0c5d..72200a901c 100644 --- a/src/scrapers/imdb/ImdbApi.h +++ b/src/scrapers/imdb/ImdbApi.h @@ -2,15 +2,11 @@ #include "data/ImdbId.h" #include "data/Locale.h" -#include "data/tv_show/EpisodeNumber.h" -#include "data/tv_show/SeasonNumber.h" -#include "data/tv_show/SeasonOrder.h" #include "network/NetworkManager.h" #include "scrapers/ScraperError.h" -#include "scrapers/ScraperInfos.h" +#include "utils/Meta.h" -#include -#include +#include #include #include #include @@ -19,7 +15,7 @@ namespace mediaelch { namespace scraper { -/// \brief API interface for TheTvDb +/// \brief API interface for IMDB using the GraphQL and Suggest APIs. class ImdbApi : public QObject { Q_OBJECT @@ -31,31 +27,23 @@ class ImdbApi : public QObject void initialize(); ELCH_NODISCARD bool isInitialized() const; -public: - /// \brief What detail page of a movie should be loaded. - enum class PageKind - { - Main, - Reference, - PlotSummary, - ReleaseInfo, - Keywords, - Episodes, - }; - public: using ApiCallback = std::function; - void sendGetRequest(const Locale& locale, const QUrl& url, ApiCallback callback); + /// \brief Search using the IMDB Suggest API (JSON, no auth). + void suggestSearch(const QString& query, ApiCallback callback); - void searchForMovie(const Locale& locale, const QString& query, bool includeAdult, ApiCallback callback); - void searchForShow(const Locale& locale, const QString& query, ApiCallback callback); + /// \brief Send a GraphQL query to graphql.imdb.com. + void sendGraphQLRequest(const QString& query, const QJsonObject& variables, ApiCallback callback); - void loadTitle(const Locale& locale, const ImdbId& movieId, PageKind page, ApiCallback callback); + /// \brief Load full title details via GraphQL. + void loadTitleViaGraphQL(const ImdbId& id, ApiCallback callback); - void loadDefaultEpisodesPage(const Locale& locale, const ImdbId& showId, ApiCallback callback); + /// \brief Load all episodes for a title via GraphQL. + void loadEpisodesViaGraphQL(const ImdbId& showId, int limit, ApiCallback callback); - void loadSeason(const Locale& locale, const ImdbId& showId, SeasonNumber season, ApiCallback callback); + /// \brief Load episodes for a specific season via GraphQL. + void loadSeasonEpisodesViaGraphQL(const ImdbId& showId, int season, int limit, ApiCallback callback); signals: void initialized(); @@ -65,17 +53,10 @@ class ImdbApi : public QObject ELCH_NODISCARD static QUrl makeFullAssetUrl(const QString& suffix); private: - /// \brief Add necessary headers for IMDb to the request object. - void addHeadersToRequest(const Locale& locale, QNetworkRequest& request); - - ELCH_NODISCARD QUrl makeTitleUrl(const ImdbId& id, PageKind page) const; - ELCH_NODISCARD QUrl makeMovieSearchUrl(const QString& searchStr, bool includeAdult) const; - ELCH_NODISCARD QUrl makeShowSearchUrl(const QString& searchStr) const; - ELCH_NODISCARD QUrl makeSeasonUrl(const ImdbId& showId, SeasonNumber season) const; - ELCH_NODISCARD QUrl makeDefaultEpisodesUrl(const ImdbId& showId) const; + ELCH_NODISCARD static QUrl makeSuggestUrl(const QString& query); + ELCH_NODISCARD static QUrl makeGraphQLUrl(); private: - const QString m_language; mediaelch::network::NetworkManager m_network; }; diff --git a/src/scrapers/imdb/ImdbGraphQLQueries.h b/src/scrapers/imdb/ImdbGraphQLQueries.h new file mode 100644 index 0000000000..f04c2e6207 --- /dev/null +++ b/src/scrapers/imdb/ImdbGraphQLQueries.h @@ -0,0 +1,198 @@ +#pragma once + +#include + +namespace mediaelch { +namespace scraper { + +/// \brief GraphQL query strings for the IMDB GraphQL API (https://graphql.imdb.com/). +/// +/// These queries are comprehensive and request more fields than MediaElch currently +/// parses. This is intentional — the extra fields (budget, awards, etc.) are available +/// for future use without modifying the API layer. +namespace ImdbGraphQLQueries { + +/// \brief Full title details query for movies and TV shows. +/// Fetches all metadata fields in a single request. +/// Variables: $id (String!) +inline const QString TITLE_DETAILS = QStringLiteral(R"( +query TitleDetails($id: ID!) { + title(id: $id) { + id + titleText { text } + originalTitleText { text } + titleType { id text } + releaseDate { day month year } + runtime { seconds } + plot { plotText { plainText } } + plots(first: 10) { + edges { node { plotText { plainText } plotType } } + } + ratingsSummary { aggregateRating voteCount } + meterRanking { currentRank } + genres { genres { text id } } + keywords(first: 100) { + edges { node { text } } + } + certificate { rating } + certificates(first: 50) { + edges { node { rating country { id text } } } + } + akas(first: 50) { + edges { node { text country { id text } language { id text } } } + } + cast: credits(first: 250, filter: { categories: ["actor", "actress"] }) { + edges { + node { + name { id nameText { text } primaryImage { url } } + ... on Cast { characters { name } } + } + } + } + directors: credits(first: 50, filter: { categories: ["director"] }) { + edges { node { name { nameText { text } } } } + } + writers: credits(first: 50, filter: { categories: ["writer"] }) { + edges { node { name { nameText { text } } } } + } + taglines(first: 5) { + edges { node { text } } + } + countriesOfOrigin { countries { id text } } + companyCredits(first: 20, filter: { categories: ["production"] }) { + edges { node { company { companyText { text } } category { text } } } + } + primaryImage { url width height } + images(first: 10) { + edges { node { url width height caption { plainText } } } + } + primaryVideos(first: 1) { + edges { node { id name { value } runtime { value } } } + } + metacritic { metascore { score } } + releaseDates(first: 50) { + edges { node { day month year country { id text } } } + } + episodes { + isOngoing + seasons { number } + } + + # Future fields — included in query but not yet parsed by MediaElch + productionBudget { budget { amount currency } } + prestigiousAwardSummary { wins nominations award { text } } + technicalSpecifications { + aspectRatios { items { aspectRatio } } + soundMixes { items { text } } + colorations { items { text } } + } + filmingLocations(first: 10) { + edges { node { text } } + total + } + moreLikeThisTitles(first: 10) { + edges { node { id titleText { text } } } + } + connections(first: 20) { + edges { + node { + associatedTitle { id titleText { text } } + category { text } + } + } + } + } +} +)"); + +/// \brief Episode listing for a specific season. +/// Variables: $id (ID!), $first (Int!) +inline const QString SEASON_EPISODES = QStringLiteral(R"( +query SeasonEpisodes($id: ID!, $first: Int!) { + title(id: $id) { + episodes { + episodes(first: $first) { + edges { + node { + id + titleText { text } + series { displayableEpisodeNumber { displayableSeason { text } episodeNumber { text } } } + plot { plotText { plainText } } + releaseDate { day month year } + ratingsSummary { aggregateRating voteCount } + runtime { seconds } + primaryImage { url width height } + certificate { rating } + certificates(first: 10) { + edges { node { rating country { id text } } } + } + directors: credits(first: 10, filter: { categories: ["director"] }) { + edges { node { name { nameText { text } } } } + } + writers: credits(first: 10, filter: { categories: ["writer"] }) { + edges { node { name { nameText { text } } } } + } + cast: credits(first: 50, filter: { categories: ["actor", "actress"] }) { + edges { + node { + name { id nameText { text } primaryImage { url } } + ... on Cast { characters { name } } + } + } + } + } + } + pageInfo { hasNextPage endCursor } + } + } + } +} +)"); + +/// \brief Episode listing filtered by season number. +/// Variables: $id (ID!), $first (Int!), $season (String!) +inline const QString SEASON_EPISODES_FILTERED = QStringLiteral(R"( +query SeasonEpisodesFiltered($id: ID!, $first: Int!, $season: String!) { + title(id: $id) { + episodes { + episodes(first: $first, filter: { includeSeasons: [$season] }) { + edges { + node { + id + titleText { text } + series { displayableEpisodeNumber { displayableSeason { text } episodeNumber { text } } } + plot { plotText { plainText } } + releaseDate { day month year } + ratingsSummary { aggregateRating voteCount } + runtime { seconds } + primaryImage { url width height } + certificate { rating } + certificates(first: 10) { + edges { node { rating country { id text } } } + } + directors: credits(first: 10, filter: { categories: ["director"] }) { + edges { node { name { nameText { text } } } } + } + writers: credits(first: 10, filter: { categories: ["writer"] }) { + edges { node { name { nameText { text } } } } + } + cast: credits(first: 50, filter: { categories: ["actor", "actress"] }) { + edges { + node { + name { id nameText { text } primaryImage { url } } + ... on Cast { characters { name } } + } + } + } + } + } + pageInfo { hasNextPage endCursor } + } + } + } +} +)"); + +} // namespace ImdbGraphQLQueries +} // namespace scraper +} // namespace mediaelch diff --git a/src/scrapers/imdb/ImdbJsonParser.cpp b/src/scrapers/imdb/ImdbJsonParser.cpp index 61f36f69c1..4b875ae27d 100644 --- a/src/scrapers/imdb/ImdbJsonParser.cpp +++ b/src/scrapers/imdb/ImdbJsonParser.cpp @@ -4,290 +4,266 @@ #include "data/ImdbId.h" #include "data/Poster.h" #include "globals/Helper.h" -#include "scrapers/ScraperUtils.h" #include #include #include #include -namespace { - -// clang-format off -const QVector IMDB_JSON_PATH_ID = { "props", "pageProps", "mainColumnData", "id" }; -const QVector IMDB_JSON_PATH_TITLE = { "props", "pageProps", "mainColumnData", "titleText", "text" }; -const QVector IMDB_JSON_PATH_ORIGINAL_TITLE = { "props", "pageProps", "mainColumnData", "originalTitleText", "text" }; -const QVector IMDB_JSON_PATH_OVERVIEW = { "props", "pageProps", "mainColumnData", "summaries", "edges", "0", "node", "plotText", "plaidHtml" }; -const QVector IMDB_JSON_PATH_OUTLINE = { "props", "pageProps", "mainColumnData", "plot", "plotText", "plainText" }; -const QVector IMDB_JSON_PATH_RELEASE_DATE = { "props", "pageProps", "mainColumnData", "releaseDate" }; -const QVector IMDB_JSON_PATH_RUNTIME_SECONDS = { "props", "pageProps", "aboveTheFoldData", "runtime", "seconds" }; -const QVector IMDB_JSON_PATH_TOP250 = { "props", "pageProps", "mainColumnData", "ratingsSummary", "topRanking", "rank" }; -const QVector IMDB_JSON_PATH_RATING = { "props", "pageProps", "mainColumnData", "ratingsSummary", "aggregateRating" }; -const QVector IMDB_JSON_PATH_VOTE_COUNT = { "props", "pageProps", "mainColumnData", "ratingsSummary", "voteCount" }; -const QVector IMDB_JSON_PATH_METACRITIC = { "props", "pageProps", "mainColumnData", "metacritic", "metascore", "score" }; -const QVector IMDB_JSON_PATH_GENRES = { "props", "pageProps", "mainColumnData", "genres", "genres" }; -const QVector IMDB_JSON_PATH_TAGLINE = { "props", "pageProps", "mainColumnData", "taglines", "edges", "0", "node", "text" }; -const QVector IMDB_JSON_PATH_KEYWORDS = { "props", "pageProps", "mainColumnData", "storylineKeywords", "edges" }; -const QVector IMDB_JSON_PATH_CERTIFICATIONS = { "props", "pageProps", "mainColumnData", "certificates", "edges" }; -const QVector IMDB_JSON_PATH_STUDIOS = { "props", "pageProps", "mainColumnData", "production", "edges" }; -const QVector IMDB_JSON_PATH_STUDIO_NAME = { "node", "company", "companyText", "text" }; -const QVector IMDB_JSON_PATH_COUNTRIES = { "props", "pageProps", "mainColumnData", "countriesOfOrigin", "countries" }; -const QVector IMDB_JSON_PATH_POSTER_URL = { "props", "pageProps", "aboveTheFoldData", "primaryImage", "url" }; -// TODO: Select highest definition -const QVector IMDB_JSON_PATH_TRAILER_URL = { "props", "pageProps", "mainColumnData", "primaryVideos", "edges", "0", "node", "playbackURLs", "0", "url" }; - -// Cast / Actors / Directors -// TODO: Scrape more actors from reference page -const QVector IMDB_JSON_PATH_CREDIT_GROUPING = { "props", "pageProps", "mainColumnData", "creditGroupings", "edges" }; -const QVector IMDB_JSON_PATH_CAST_NAME = { "node", "name", "nameText", "text" }; -const QVector IMDB_JSON_PATH_CAST_URL = { "node", "name", "primaryImage", "url" }; -const QVector IMDB_JSON_PATH_CAST_ROLE = { "node", "creditedRoles", "edges", "0", "node", "text" }; - -// TV Shows -const QVector IMDB_JSON_PATH_SEASONS = { "props", "pageProps", "contentData", "entityMetadata",/*??*/ "data", "title", "episodes", "seasons" }; -const QVector IMDB_JSON_PATH_SEASON_EPISODES = { "props", "pageProps", "contentData", "section", "episodes", "items" }; - -// Plot-Summary page -const QVector IMDB_JSON_PATH_PLOTSUMMARY_SYNOPSIS = { "props", "pageProps", "contentData", "data", "title", "plotSynopsis", "edges", "0", "node", "plotText", "plaidHtml" }; - -// clang-format on - -} // namespace - namespace mediaelch { namespace scraper { -ImdbData ImdbJsonParser::parseFromReferencePage(const QString& html, const Locale& preferredLocale) +// ============================================================================= +// GraphQL-based parsing (new) +// ============================================================================= + +ImdbData ImdbJsonParser::parseFromGraphQL(const QString& json, const Locale& locale) { - // Note: Expects HTML from https://www.imdb.com/title/tt________/reference - QJsonDocument json = extractJsonFromHtml(html); + QJsonParseError parseError{}; + const QJsonDocument doc = QJsonDocument::fromJson(json.toUtf8(), &parseError); + if (parseError.error != QJsonParseError::NoError) { + return {}; + } - ImdbJsonParser parser{}; - parser.parseAndAssignDetails(json, preferredLocale); - parser.parseAndAssignDirectors(json); - parser.parseAndAssignWriters(json); - parser.parseAndStoreActors(json); + const QJsonObject title = doc.object().value("data").toObject().value("title").toObject(); + if (title.isEmpty()) { + return {}; + } + ImdbJsonParser parser; + parser.parseGraphQLTitle(title, locale); + parser.parseGraphQLCredits(title); + parser.parseGraphQLActors(title); return parser.m_data; } -Optional ImdbJsonParser::parseOverviewFromPlotSummaryPage(const QString& html) +void ImdbJsonParser::parseGraphQLTitle(const QJsonObject& title, const Locale& locale) { - // Note: Expects HTML from https://www.imdb.com/title/tt________/plotsummray - QJsonDocument json = extractJsonFromHtml(html); + using namespace std::chrono; - ImdbJsonParser parser{}; - parser.parseAndAssignOverviewFromPlotSummary(json); + // IMDB ID + const QString id = title.value("id").toString(); + if (!id.isEmpty()) { + m_data.imdbId = ImdbId(id); + } - return parser.m_data.overview; -} + // Title + Original Title + m_data.title = title.value("titleText").toObject().value("text").toString().trimmed(); + const QString origTitle = title.value("originalTitleText").toObject().value("text").toString().trimmed(); + if (!origTitle.isEmpty()) { + m_data.originalTitle = origTitle; + } -QVector ImdbJsonParser::parseSeasonNumbersFromEpisodesPage(const QString& html) -{ - QVector seasons; - QJsonObject json = extractJsonFromHtml(html).object(); - QJsonArray seasonsArray = followJsonPath(json, IMDB_JSON_PATH_SEASONS).toArray(); - for (const auto& season : seasonsArray) { - const int number = season.toObject().value("number").toInt(-1); - if (number > -1) { - seasons.append(number); + // Localized title from AKAs + if (locale.language() != "en") { + // Locale may be "de" (no country) or "de-DE". Derive country from language if needed. + const QString country = locale.hasCountry() ? locale.country().toUpper() : locale.language().toUpper(); + const QJsonArray akas = title.value("akas").toObject().value("edges").toArray(); + for (const auto& akaEntry : akas) { + const QJsonObject node = akaEntry.toObject().value("node").toObject(); + const QString akaCountry = node.value("country").toObject().value("id").toString(); + if (akaCountry == country) { + const QString localizedTitle = node.value("text").toString().trimmed(); + if (!localizedTitle.isEmpty()) { + m_data.localizedTitle = localizedTitle; + break; + } + } } } - return seasons; -} -QVector ImdbJsonParser::parseEpisodeIds(const QString& html) -{ - QVector episodes; - QJsonObject json = extractJsonFromHtml(html).object(); - QJsonArray episodesArray = followJsonPath(json, IMDB_JSON_PATH_SEASON_EPISODES).toArray(); - for (const auto& episodeValue : episodesArray) { - QJsonObject episodeObject = episodeValue.toObject(); - ImdbShortEpisodeData data; - { - bool ok{false}; - data.imdbId = episodeObject.value("id").toString(); - data.seasonNumber = episodeObject.value("season").toString().toInt(&ok); - if (!ok) { - continue; + // Plot / Overview — use the longest plot text + const QJsonArray plots = title.value("plots").toObject().value("edges").toArray(); + QString longestPlot; + QString shortestPlot; + for (const auto& plotEntry : plots) { + const QString plotText = + plotEntry.toObject().value("node").toObject().value("plotText").toObject().value("plainText").toString(); + if (!plotText.isEmpty()) { + if (plotText.length() > longestPlot.length()) { + longestPlot = plotText; } - } - { - bool ok{false}; - data.episodeNumber = episodeObject.value("episode").toString().toInt(&ok); - if (!ok) { - continue; + if (shortestPlot.isEmpty() || plotText.length() < shortestPlot.length()) { + shortestPlot = plotText; } } - episodes.append(data); } - return episodes; -} - -void ImdbJsonParser::parseAndAssignDetails(const QJsonDocument& json, const Locale& preferredLocale) -{ - using namespace std::chrono; - - QJsonValue value; - - value = followJsonPath(json, IMDB_JSON_PATH_ID); - if (value.isString()) { - QString id = value.toString(); - m_data.imdbId = ImdbId(id); + // Fallback to the single "plot" field if plots array is empty + if (longestPlot.isEmpty()) { + longestPlot = title.value("plot").toObject().value("plotText").toObject().value("plainText").toString(); } - - value = followJsonPath(json, IMDB_JSON_PATH_TITLE); - if (value.isString()) { - m_data.title = value.toString().trimmed(); + if (!longestPlot.isEmpty()) { + m_data.overview = longestPlot.trimmed(); } - - value = followJsonPath(json, IMDB_JSON_PATH_ORIGINAL_TITLE); - if (value.isString()) { - m_data.originalTitle = value.toString().trimmed(); - } - - value = followJsonPath(json, IMDB_JSON_PATH_OVERVIEW); - if (value.isString()) { - m_data.overview = removeHtmlEntities(value.toString().trimmed()); + if (!shortestPlot.isEmpty() && shortestPlot != longestPlot) { + m_data.outline = shortestPlot.trimmed(); + } else if (!longestPlot.isEmpty()) { + // Use first sentence as outline if no separate short plot + const qsizetype dotPos = longestPlot.indexOf(". "); + if (dotPos > 0 && dotPos < longestPlot.length() - 2) { + m_data.outline = longestPlot.left(dotPos + 1).trimmed(); + } } - value = followJsonPath(json, IMDB_JSON_PATH_OUTLINE); - if (value.isString()) { - m_data.outline = removeHtmlEntities(value.toString().trimmed()); + // Genres + const QJsonArray genres = title.value("genres").toObject().value("genres").toArray(); + for (const auto& genreObj : genres) { + const QString genre = genreObj.toObject().value("text").toString().trimmed(); + if (!genre.isEmpty()) { + m_data.genres.insert(genre); + } } - value = followJsonPath(json, IMDB_JSON_PATH_GENRES); - if (value.isArray()) { - for (const auto& genreObj : value.toArray()) { - QString genre = genreObj.toObject().value("text").toString().trimmed(); - if (!genre.isEmpty()) { - m_data.genres.insert(genre); - } + // Studios (production companies) + const QJsonArray companies = title.value("companyCredits").toObject().value("edges").toArray(); + for (const auto& companyEntry : companies) { + const QString studio = companyEntry.toObject() + .value("node") + .toObject() + .value("company") + .toObject() + .value("companyText") + .toObject() + .value("text") + .toString() + .trimmed(); + if (!studio.isEmpty()) { + m_data.studios.insert(helper::mapStudio(studio)); } } - value = followJsonPath(json, IMDB_JSON_PATH_STUDIOS); - if (value.isArray()) { - for (const auto& studioObj : value.toArray()) { - QString studio = followJsonPath(studioObj.toObject(), IMDB_JSON_PATH_STUDIO_NAME).toString().trimmed(); - if (!studio.isEmpty()) { - m_data.studios.insert(helper::mapStudio(studio)); - } + // Countries + const QJsonArray countries = title.value("countriesOfOrigin").toObject().value("countries").toArray(); + for (const auto& countryObj : countries) { + const QString country = countryObj.toObject().value("id").toString().trimmed(); + if (!country.isEmpty()) { + m_data.countries.insert(helper::mapCountry(country)); } } - value = followJsonPath(json, IMDB_JSON_PATH_COUNTRIES); - if (value.isArray()) { - for (const auto& countryObj : value.toArray()) { - QString country = countryObj.toObject().value("id").toString().trimmed(); - if (!country.isEmpty()) { - m_data.countries.insert(helper::mapCountry(country)); - } + // Tagline + const QJsonArray taglines = title.value("taglines").toObject().value("edges").toArray(); + if (!taglines.isEmpty()) { + const QString tagline = + taglines.at(0).toObject().value("node").toObject().value("text").toString().trimmed(); + if (!tagline.isEmpty()) { + m_data.tagline = tagline; } } - value = followJsonPath(json, IMDB_JSON_PATH_TAGLINE); - if (value.isString()) { - m_data.tagline = removeHtmlEntities(value.toString().trimmed()); + // Runtime + const int runtimeSeconds = title.value("runtime").toObject().value("seconds").toInt(-1); + if (runtimeSeconds > 0) { + m_data.runtime = minutes(qCeil(runtimeSeconds / 60.)); } - value = followJsonPath(json, IMDB_JSON_PATH_RUNTIME_SECONDS); - if (value.isDouble()) { - const int runtime = value.toInt(-1); - if (runtime > 0) { - m_data.runtime = minutes(qCeil(runtime / 60.)); + // Release date + const QJsonObject releaseDateObj = title.value("releaseDate").toObject(); + const int year = releaseDateObj.value("year").toInt(-1); + if (year > 0) { + const int month = releaseDateObj.value("month").toInt(1); + const int day = releaseDateObj.value("day").toInt(1); + QDate date(year, month, day); + if (date.isValid()) { + m_data.released = date; } } - value = followJsonPath(json, IMDB_JSON_PATH_RELEASE_DATE); - if (value.isObject()) { - QJsonObject releaseDateObj = value.toObject(); - int day = releaseDateObj.value("day").toInt(-1); - int month = releaseDateObj.value("month").toInt(-1); - int year = releaseDateObj.value("year").toInt(-1); - if (day > -1 && month > -1 && year > -1) { - QDate date(year, month, day); - if (date.isValid()) { - m_data.released = date; + // Localized release date (override if available) + if (locale.language() != "en") { + const QString country = locale.hasCountry() ? locale.country().toUpper() : locale.language().toUpper(); + const QJsonArray releaseDates = title.value("releaseDates").toObject().value("edges").toArray(); + for (const auto& rdEntry : releaseDates) { + const QJsonObject node = rdEntry.toObject().value("node").toObject(); + if (node.value("country").toObject().value("id").toString() == country) { + const int rdYear = node.value("year").toInt(-1); + const int rdMonth = node.value("month").toInt(1); + const int rdDay = node.value("day").toInt(1); + if (rdYear > 0) { + QDate localDate(rdYear, rdMonth, rdDay); + if (localDate.isValid()) { + m_data.released = localDate; + break; + } + } } } } - value = followJsonPath(json, IMDB_JSON_PATH_RATING); - if (value.isDouble()) { - const double avgRating = value.toDouble(); - const int voteCount = followJsonPath(json, IMDB_JSON_PATH_VOTE_COUNT).toInt(-1); - if (avgRating > 0 || voteCount > 0) { - Rating rating; - rating.rating = avgRating; - rating.voteCount = voteCount; - rating.source = "imdb"; - rating.maxRating = 10; - m_data.ratings.append(rating); - } + // Rating (IMDB) + const QJsonObject ratingsSummary = title.value("ratingsSummary").toObject(); + const double avgRating = ratingsSummary.value("aggregateRating").toDouble(0.0); + const int voteCount = ratingsSummary.value("voteCount").toInt(0); + if (avgRating > 0 || voteCount > 0) { + Rating rating; + rating.rating = avgRating; + rating.voteCount = voteCount; + rating.source = "imdb"; + rating.maxRating = 10; + m_data.ratings.append(rating); } - value = followJsonPath(json, IMDB_JSON_PATH_METACRITIC); - if (value.isDouble()) { - const int metascore = value.toInt(-1); - if (metascore > 0) { - Rating rating; - rating.rating = metascore; - rating.voteCount = 0; - rating.source = "metacritic"; - rating.maxRating = 100; - m_data.ratings.append(rating); - } + // Metacritic + const int metascore = + title.value("metacritic").toObject().value("metascore").toObject().value("score").toInt(-1); + if (metascore > 0) { + Rating rating; + rating.rating = metascore; + rating.voteCount = 0; + rating.source = "metacritic"; + rating.maxRating = 100; + m_data.ratings.append(rating); } - value = followJsonPath(json, IMDB_JSON_PATH_TOP250); - if (value.isDouble()) { - const double top250 = value.toInt(-1); - if (top250 > 0 && top250 <= 250) { - m_data.top250 = top250; - } - } + // Top250 is not available via IMDB's GraphQL API. The meterRanking field is + // STARmeter (popularity rank), which is a different metric. We leave top250 + // unset (defaults to -1 in Movie/TvShow). - value = followJsonPath(json, IMDB_JSON_PATH_KEYWORDS); - if (value.isArray()) { - for (const auto& keywordObj : value.toArray()) { - QString keyword = keywordObj.toObject().value("node").toObject().value("text").toString().trimmed(); - if (!keyword.isEmpty()) { - m_data.keywords.insert(keyword); - } + // Keywords + const QJsonArray keywords = title.value("keywords").toObject().value("edges").toArray(); + for (const auto& kwEntry : keywords) { + const QString keyword = kwEntry.toObject().value("node").toObject().value("text").toString().trimmed(); + if (!keyword.isEmpty()) { + m_data.keywords.insert(keyword); } } - value = followJsonPath(json, IMDB_JSON_PATH_CERTIFICATIONS); - if (value.isArray()) { - // TODO: Since IMDB only supports one locale at the moment, this has no real effect, yet! - Certification locale; - Certification us; - - for (const auto& certObj : value.toArray()) { - QJsonObject node = certObj.toObject().value("node").toObject(); - QString certificationCountry = node.value("country").toObject().value("id").toString().trimmed(); - QString certificationCode = node.value("rating").toString().trimmed(); - - const Certification certification = Certification(certificationCode); - if (certificationCountry == "US") { - us = certification; - } - if (certificationCountry == preferredLocale.country()) { - locale = certification; - } + // Certification — locale-specific, fallback to US + const QJsonArray certificates = title.value("certificates").toObject().value("edges").toArray(); + Certification localeCert; + Certification usCert; + const QString localeCountry = locale.hasCountry() ? locale.country().toUpper() : locale.language().toUpper(); + for (const auto& certEntry : certificates) { + const QJsonObject node = certEntry.toObject().value("node").toObject(); + const QString certCountry = node.value("country").toObject().value("id").toString(); + const QString certRating = node.value("rating").toString().trimmed(); + const Certification cert = Certification(certRating); + if (certCountry == "US") { + usCert = cert; + } + if (certCountry == localeCountry) { + localeCert = cert; } + } + if (localeCert.isValid()) { + m_data.localizedCertification = helper::mapCertification(localeCert); + m_data.certification = m_data.localizedCertification; + } else if (usCert.isValid()) { + m_data.certification = helper::mapCertification(usCert); + } - if (locale.isValid()) { - m_data.certification = helper::mapCertification(locale); - } else if (us.isValid()) { - m_data.certification = helper::mapCertification(us); + // Also check the simple "certificate" field as fallback + if (!m_data.certification.hasValue()) { + const QString simpleCert = title.value("certificate").toObject().value("rating").toString().trimmed(); + if (!simpleCert.isEmpty()) { + m_data.certification = helper::mapCertification(Certification(simpleCert)); } } - value = followJsonPath(json, IMDB_JSON_PATH_POSTER_URL); - if (value.isString()) { - const QUrl url(sanitizeAmazonMediaUrl(value.toString())); + // Poster + const QString posterUrl = title.value("primaryImage").toObject().value("url").toString(); + if (!posterUrl.isEmpty()) { + const QUrl url(sanitizeAmazonMediaUrl(posterUrl)); if (url.isValid()) { Poster p; p.thumbUrl = url; @@ -296,161 +272,290 @@ void ImdbJsonParser::parseAndAssignDetails(const QJsonDocument& json, const Loca } } - value = followJsonPath(json, IMDB_JSON_PATH_TRAILER_URL); - if (value.isString()) { - const QUrl url(value.toString()); - if (url.isValid()) { - m_data.trailer = url; + // Backdrops — from images list (excluding the primary poster image) + const QJsonArray imageEdges = title.value("images").toObject().value("edges").toArray(); + for (const QJsonValue& edge : imageEdges) { + const QJsonObject node = edge.toObject().value("node").toObject(); + const QString imgUrl = node.value("url").toString(); + if (!imgUrl.isEmpty() && imgUrl != posterUrl) { + const QUrl url(sanitizeAmazonMediaUrl(imgUrl)); + if (url.isValid()) { + Poster p; + p.thumbUrl = url; + p.originalUrl = url; + m_data.backdrops.append(p); + } } } -} - -QJsonDocument ImdbJsonParser::extractJsonFromHtml(const QString& html) -{ - QRegularExpression rx(R"re()re", - QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match = rx.match(html); - if (match.hasMatch()) { - return QJsonDocument::fromJson(match.captured(1).toUtf8()); + // Trailer — store IMDB video page URL (works in browser, not in Kodi) + const QJsonArray videos = title.value("primaryVideos").toObject().value("edges").toArray(); + if (!videos.isEmpty()) { + const QString videoId = videos.at(0).toObject().value("node").toObject().value("id").toString(); + if (!videoId.isEmpty()) { + m_data.trailer = QUrl(QStringLiteral("https://www.imdb.com/video/%1/").arg(videoId)); + } } - return QJsonDocument{}; -} -QJsonValue ImdbJsonParser::followJsonPath(const QJsonDocument& json, const QVector& paths) -{ - return followJsonPath(json.object(), paths); + // TV show specific: ongoing status + const QJsonObject episodes = title.value("episodes").toObject(); + if (!episodes.isEmpty()) { + m_data.isOngoing = episodes.value("isOngoing").toBool(false); + } } -QJsonValue ImdbJsonParser::followJsonPath(const QJsonObject& json, const QVector& paths) +void ImdbJsonParser::parseGraphQLCredits(const QJsonObject& title) { - QJsonValue next = json; - QJsonObject obj; - - for (const QString& path : paths) { - if (path == "0") { // special case for first entry of arrays - if (!next.isArray()) { - return QJsonValue::Null; - } - QJsonArray array = next.toArray(); - if (array.isEmpty()) { - return QJsonValue::Null; - } - next = array.at(0); + // Directors + const QJsonArray directors = title.value("directors").toObject().value("edges").toArray(); + for (const auto& dirEntry : directors) { + const QString name = + dirEntry.toObject().value("node").toObject().value("name").toObject().value("nameText").toObject().value( + "text").toString().trimmed(); + if (!name.isEmpty()) { + m_data.directors.insert(name); + } + } - } else { - if (!next.isObject()) { - return QJsonValue::Null; - } - obj = next.toObject(); - if (!obj.contains(path)) { - return QJsonValue::Null; - } - next = obj.value(path); + // Writers + const QJsonArray writers = title.value("writers").toObject().value("edges").toArray(); + for (const auto& writerEntry : writers) { + const QString name = writerEntry.toObject() + .value("node") + .toObject() + .value("name") + .toObject() + .value("nameText") + .toObject() + .value("text") + .toString() + .trimmed(); + if (!name.isEmpty()) { + m_data.writers.insert(name); } } - return next; } -void ImdbJsonParser::parseAndAssignDirectors(const QJsonDocument& json) +void ImdbJsonParser::parseGraphQLActors(const QJsonObject& title) { - QJsonValue groupings = followJsonPath(json, IMDB_JSON_PATH_CREDIT_GROUPING); - if (!groupings.isArray()) { - return; - } - - for (QJsonValue grouping : groupings.toArray()) { - QString groupingType = - grouping.toObject().value("node").toObject().value("grouping").toObject().value("text").toString(); - - if (groupingType != "Director" && groupingType != "Directors") { - // It seems the type depends on number of entries. + const QJsonArray cast = title.value("cast").toObject().value("edges").toArray(); + for (const auto& castEntry : cast) { + const QJsonObject node = castEntry.toObject().value("node").toObject(); + const QJsonObject nameObj = node.value("name").toObject(); + const QString name = nameObj.value("nameText").toObject().value("text").toString().trimmed(); + if (name.isEmpty()) { continue; } - QJsonArray directorsJson = - grouping.toObject().value("node").toObject().value("credits").toObject().value("edges").toArray(); - for (const auto& directorEntry : directorsJson) { - // TODO: We could/should also store images, etc. of directors and writers - const QJsonObject directorObj = directorEntry.toObject(); - const QString name = followJsonPath(directorObj, IMDB_JSON_PATH_CAST_NAME).toString().trimmed(); - if (!name.isEmpty()) { - m_data.directors.insert(name); - } + Actor actor; + actor.name = name; + actor.id = nameObj.value("id").toString(); + actor.thumb = sanitizeAmazonMediaUrl(nameObj.value("primaryImage").toObject().value("url").toString()); + + // Character name(s) + const QJsonArray characters = node.value("characters").toArray(); + if (!characters.isEmpty()) { + actor.role = characters.at(0).toObject().value("name").toString().trimmed(); } + + m_data.actors.append(actor); } } -void ImdbJsonParser::parseAndAssignWriters(const QJsonDocument& json) +QVector ImdbJsonParser::parseEpisodesFromGraphQL(const QString& json, const Locale& locale) { - QJsonValue groupings = followJsonPath(json, IMDB_JSON_PATH_CREDIT_GROUPING); - if (!groupings.isArray()) { - return; + QJsonParseError parseError{}; + const QJsonDocument doc = QJsonDocument::fromJson(json.toUtf8(), &parseError); + if (parseError.error != QJsonParseError::NoError) { + return {}; } + const QJsonArray episodes = doc.object() + .value("data") + .toObject() + .value("title") + .toObject() + .value("episodes") + .toObject() + .value("episodes") + .toObject() + .value("edges") + .toArray(); + + QVector result; + for (const auto& epEntry : episodes) { + const QJsonObject node = epEntry.toObject().value("node").toObject(); + ImdbEpisodeData ep; + + ep.imdbId = ImdbId(node.value("id").toString()); + + // Episode/season numbers (returned as text strings from displayableEpisodeNumber) + const QJsonObject den = node.value("series") + .toObject() + .value("displayableEpisodeNumber") + .toObject(); + bool seasonOk = false; + bool episodeOk = false; + ep.seasonNumber = den.value("displayableSeason").toObject().value("text").toString().toInt(&seasonOk); + ep.episodeNumber = den.value("episodeNumber").toObject().value("text").toString().toInt(&episodeOk); + if (!seasonOk) { + ep.seasonNumber = -1; + } + if (!episodeOk) { + ep.episodeNumber = -1; + } - for (QJsonValue grouping : groupings.toArray()) { - QString groupingType = - grouping.toObject().value("node").toObject().value("grouping").toObject().value("text").toString(); + // Title + const QString epTitle = node.value("titleText").toObject().value("text").toString().trimmed(); + if (!epTitle.isEmpty()) { + ep.title = epTitle; + } - if (groupingType != "Writer" && groupingType != "Writers") { - // It seems the type depends on number of entries. - continue; + // Plot + const QString plot = node.value("plot").toObject().value("plotText").toObject().value("plainText").toString(); + if (!plot.isEmpty()) { + ep.overview = plot.trimmed(); } - QJsonArray writersJson = - grouping.toObject().value("node").toObject().value("credits").toObject().value("edges").toArray(); - for (const auto& writerEntry : writersJson) { - // TODO: We could/should also store images, etc. of directors and writers - const QJsonObject writerObj = writerEntry.toObject(); - const QString name = followJsonPath(writerObj, IMDB_JSON_PATH_CAST_NAME).toString().trimmed(); - if (!name.isEmpty()) { - m_data.writers.insert(name); - } + // First aired + const QJsonObject rd = node.value("releaseDate").toObject(); + const int rdYear = rd.value("year").toInt(-1); + if (rdYear > 0) { + ep.firstAired = QDate(rdYear, rd.value("month").toInt(1), rd.value("day").toInt(1)); } - } -} -void ImdbJsonParser::parseAndStoreActors(const QJsonDocument& json) -{ - QJsonValue groupings = followJsonPath(json, IMDB_JSON_PATH_CREDIT_GROUPING); - if (!groupings.isArray()) { - return; - } + // Rating + const QJsonObject rs = node.value("ratingsSummary").toObject(); + const double rating = rs.value("aggregateRating").toDouble(0.0); + const int votes = rs.value("voteCount").toInt(0); + if (rating > 0 || votes > 0) { + Rating r; + r.rating = rating; + r.voteCount = votes; + r.source = "imdb"; + r.maxRating = 10; + ep.ratings.append(r); + } - for (QJsonValue grouping : groupings.toArray()) { - QString groupingType = - grouping.toObject().value("node").toObject().value("grouping").toObject().value("text").toString(); + // Runtime + const int rtSeconds = node.value("runtime").toObject().value("seconds").toInt(-1); + if (rtSeconds > 0) { + ep.runtime = std::chrono::minutes(qCeil(rtSeconds / 60.)); + } - if (groupingType != "Cast") { - continue; + // Thumbnail + const QString thumbUrl = node.value("primaryImage").toObject().value("url").toString(); + if (!thumbUrl.isEmpty()) { + Poster p; + p.thumbUrl = QUrl(sanitizeAmazonMediaUrl(thumbUrl)); + p.originalUrl = p.thumbUrl; + ep.thumbnail = p; + } + + // Certification — locale-specific, fallback to simple certificate + { + const QString localeCountry = locale.hasCountry() ? locale.country().toUpper() : locale.language().toUpper(); + Certification localeCert; + Certification usCert; + const QJsonArray certs = node.value("certificates").toObject().value("edges").toArray(); + for (const auto& certEntry : certs) { + const QJsonObject certNode = certEntry.toObject().value("node").toObject(); + const QString certCountry = certNode.value("country").toObject().value("id").toString(); + const Certification cert = Certification(certNode.value("rating").toString().trimmed()); + if (certCountry == "US") { + usCert = cert; + } + if (certCountry == localeCountry) { + localeCert = cert; + } + } + if (localeCert.isValid()) { + ep.certification = helper::mapCertification(localeCert); + } else if (usCert.isValid()) { + ep.certification = helper::mapCertification(usCert); + } else { + const QString simpleCert = node.value("certificate").toObject().value("rating").toString().trimmed(); + if (!simpleCert.isEmpty()) { + ep.certification = helper::mapCertification(Certification(simpleCert)); + } + } } - QJsonArray actorsJson = - grouping.toObject().value("node").toObject().value("credits").toObject().value("edges").toArray(); + // Directors + const QJsonArray dirs = node.value("directors").toObject().value("edges").toArray(); + for (const auto& d : dirs) { + const QString name = + d.toObject().value("node").toObject().value("name").toObject().value("nameText").toObject().value( + "text").toString().trimmed(); + if (!name.isEmpty()) { + ep.directors.insert(name); + } + } - for (const auto& actorEntry : actorsJson) { - const QJsonObject actorObj = actorEntry.toObject(); - const QString name = followJsonPath(actorObj, IMDB_JSON_PATH_CAST_NAME).toString().trimmed(); - const QString url = followJsonPath(actorObj, IMDB_JSON_PATH_CAST_URL).toString().trimmed(); - const QString role = followJsonPath(actorObj, IMDB_JSON_PATH_CAST_ROLE).toString().trimmed(); + // Writers + const QJsonArray wrs = node.value("writers").toObject().value("edges").toArray(); + for (const auto& w : wrs) { + const QString name = + w.toObject().value("node").toObject().value("name").toObject().value("nameText").toObject().value( + "text").toString().trimmed(); if (!name.isEmpty()) { + ep.writers.insert(name); + } + } + + // Actors + const QJsonArray castArr = node.value("cast").toObject().value("edges").toArray(); + for (const auto& c : castArr) { + const QJsonObject cNode = c.toObject().value("node").toObject(); + const QJsonObject nameObj = cNode.value("name").toObject(); + const QString actorName = nameObj.value("nameText").toObject().value("text").toString().trimmed(); + if (!actorName.isEmpty()) { Actor actor; - actor.name = name; - actor.role = role; - actor.thumb = sanitizeAmazonMediaUrl(url); - m_data.actors.append(actor); + actor.name = actorName; + actor.id = nameObj.value("id").toString(); + actor.thumb = sanitizeAmazonMediaUrl(nameObj.value("primaryImage").toObject().value("url").toString()); + const QJsonArray chars = cNode.value("characters").toArray(); + if (!chars.isEmpty()) { + actor.role = chars.at(0).toObject().value("name").toString().trimmed(); + } + ep.actors.append(actor); } } + + if (ep.imdbId.isValid()) { + result.append(ep); + } } + + return result; } -void ImdbJsonParser::parseAndAssignOverviewFromPlotSummary(const QJsonDocument& json) +QVector ImdbJsonParser::parseSeasonsFromGraphQL(const QString& json) { - const QJsonValue value = followJsonPath(json, IMDB_JSON_PATH_PLOTSUMMARY_SYNOPSIS); - if (value.isString()) { - m_data.overview = removeHtmlEntities(value.toString().trimmed()); + QJsonParseError parseError{}; + const QJsonDocument doc = QJsonDocument::fromJson(json.toUtf8(), &parseError); + if (parseError.error != QJsonParseError::NoError) { + return {}; + } + + const QJsonArray seasons = doc.object() + .value("data") + .toObject() + .value("title") + .toObject() + .value("episodes") + .toObject() + .value("seasons") + .toArray(); + + QVector result; + for (const auto& seasonEntry : seasons) { + const int num = seasonEntry.toObject().value("number").toInt(-1); + if (num >= 0) { + result.append(num); + } } + return result; } QString ImdbJsonParser::sanitizeAmazonMediaUrl(QString url) diff --git a/src/scrapers/imdb/ImdbJsonParser.h b/src/scrapers/imdb/ImdbJsonParser.h index 0787a8e6d0..46899f0a0f 100644 --- a/src/scrapers/imdb/ImdbJsonParser.h +++ b/src/scrapers/imdb/ImdbJsonParser.h @@ -8,7 +8,7 @@ #include "data/movie/MovieImages.h" #include -#include +#include #include #include #include @@ -33,6 +33,7 @@ class ImdbData Optional top250; Optional certification; Optional poster; + QVector backdrops; Optional trailer{}; QVector actors; @@ -42,47 +43,59 @@ class ImdbData QSet studios; QSet countries; QSet keywords; + + // Localization fields + Optional localizedTitle; + Optional localizedCertification; + + // TV show specific + Optional isOngoing; }; -struct ImdbShortEpisodeData +struct ImdbEpisodeData { - QString imdbId; - int seasonNumber; - int episodeNumber; + ImdbId imdbId; + int seasonNumber = -1; + int episodeNumber = -1; + Optional title; + Optional overview; + Optional firstAired; + Optional thumbnail; + QVector ratings; + Optional runtime; + Optional certification; + QSet directors; + QSet writers; + QVector actors; }; class ImdbJsonParser { public: - static ImdbData parseFromReferencePage(const QString& html, const mediaelch::Locale& preferredLocale); - static Optional parseOverviewFromPlotSummaryPage(const QString& html); - static QVector parseSeasonNumbersFromEpisodesPage(const QString& html); - static QVector parseEpisodeIds(const QString& html); + /// \brief Parse full title details from a GraphQL API response. + static ImdbData parseFromGraphQL(const QString& json, const mediaelch::Locale& locale); - ~ImdbJsonParser() = default; + /// \brief Parse episode list from a GraphQL episodes response. + static QVector parseEpisodesFromGraphQL(const QString& json, const mediaelch::Locale& locale); -private: - ImdbJsonParser() = default; + /// \brief Parse season numbers from a GraphQL title details response. + static QVector parseSeasonsFromGraphQL(const QString& json); - void parseAndAssignDetails(const QJsonDocument& json, const mediaelch::Locale& preferredLocale); - void parseAndAssignDirectors(const QJsonDocument& json); - void parseAndStoreActors(const QJsonDocument& json); - void parseAndAssignWriters(const QJsonDocument& json); - /// \brief Parse and assign the plot/overview from IMDB's `/plotsummary` page. - /// \details IMDB's `/reference` page does not include a movie's plot, only an outline. - /// Hence, we use `/plotsummary` to get the full plot. - void parseAndAssignOverviewFromPlotSummary(const QJsonDocument& json); + ~ImdbJsonParser() = default; /// Sanitize the given URL. Return value is the same object as the input string. static QString sanitizeAmazonMediaUrl(QString url); - static QJsonDocument extractJsonFromHtml(const QString& html); - static QJsonValue followJsonPath(const QJsonDocument& json, const QVector& paths); - static QJsonValue followJsonPath(const QJsonObject& json, const QVector& paths); + +private: + ImdbJsonParser() = default; + + void parseGraphQLTitle(const QJsonObject& title, const mediaelch::Locale& locale); + void parseGraphQLCredits(const QJsonObject& title); + void parseGraphQLActors(const QJsonObject& title); private: ImdbData m_data{}; }; - } // namespace scraper } // namespace mediaelch diff --git a/src/scrapers/imdb/ImdbReferencePage.cpp b/src/scrapers/imdb/ImdbReferencePage.cpp deleted file mode 100644 index bd45ed0976..0000000000 --- a/src/scrapers/imdb/ImdbReferencePage.cpp +++ /dev/null @@ -1,300 +0,0 @@ -#include "ImdbReferencePage.h" - -#include "data/movie/Movie.h" -#include "globals/Helper.h" -#include "scrapers/ScraperUtils.h" - -#include -#include -#include - -namespace mediaelch { -namespace scraper { - -QString ImdbReferencePage::extractTitle(const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - rx.setPattern(R"(

\n([^<]+)\n([^\n]+)\n\s+([^<]+))"); - match = rx.match(html); - - if (match.hasMatch()) { - rx.setPattern(R"( \(.+\))"); - const QString dateStr = match.captured(1).remove(rx).trimmed(); - // Qt::RFC2822Date is basically "dd MMM yyyy" - return QDate::fromString(dateStr, Qt::RFC2822Date); - } - return {}; -} - -void ImdbReferencePage::extractStudios(Movie* movie, const QString& html) -{ - QRegularExpression rx(R"(Production Companies

.+
    (.+)
)", - QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - - const QRegularExpressionMatch match = rx.match(html); - - if (match.hasMatch()) { - QString listHtml = match.captured(1); - rx.setPattern(R"(([^<]+))"); - QRegularExpressionMatchIterator matches = rx.globalMatch(listHtml); - - while (matches.hasNext()) { - movie->addStudio(helper::mapStudio(removeHtmlEntities(matches.next().captured(1)).trimmed())); - } - } -} - -void ImdbReferencePage::extractDirectors(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - // Note: Either "Director" or "Directors", depending on their number. - rx.setPattern(R"re(Directors?:\s?\n\s+
    (.*)
)re"); - match = rx.match(html); - if (!match.hasMatch()) { - return; - } - - QString directorsBlock = match.captured(1); - QStringList directors; - - rx.setPattern(R"re(href="/name/[^"]+">([^<]+))re"); - QRegularExpressionMatchIterator matches = rx.globalMatch(directorsBlock); - - while (matches.hasNext()) { - directors << removeHtmlEntities(matches.next().captured(1)).trimmed(); - } - movie->setDirector(directors.join(", ")); -} - -void ImdbReferencePage::extractWriters(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - // Note: Either "Writer" or "Writers", depending on their number. - rx.setPattern(R"re(Writers?:\s?\n\s+
    (.*)
)re"); - match = rx.match(html); - if (!match.hasMatch()) { - return; - } - - QString writersBlock = match.captured(1); - QStringList writers; - - rx.setPattern(R"re(href="/name/[^"]+">([^<]+))re"); - QRegularExpressionMatchIterator matches = rx.globalMatch(writersBlock); - - while (matches.hasNext()) { - writers << removeHtmlEntities(matches.next().captured(1)).trimmed(); - } - movie->setWriter(writers.join(", ")); -} - -void ImdbReferencePage::extractCertification(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatchIterator matches; - - // TODO: There are also other countries, e.g. DE - rx.setPattern(R"rx(([^<]+))rx"); - matches = rx.globalMatch(html); - - QStringList certifications; - - while (matches.hasNext()) { - QRegularExpressionMatch match = matches.next(); - const QStringList cert = match.captured(1).split(":"); - if (cert.size() == 2) { - certifications << cert.at(1); - } - } - - if (!certifications.isEmpty()) { - // Some inside note: US has e.g. TV-G and PG. PG is listed last for some reason and I - // personally prefer it. - movie->setCertification(helper::mapCertification(Certification(certifications.last()))); - } -} - -void ImdbReferencePage::extractGenres(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - rx.setPattern(R"(Genres\n\s+(.+))"); - match = rx.match(html); - - if (match.hasMatch()) { - const QString genreHtmlList = match.captured(1); - rx.setPattern(R"(([^<]+))"); - QRegularExpressionMatchIterator matches = rx.globalMatch(genreHtmlList); - - while (matches.hasNext()) { - movie->addGenre(helper::mapGenre(matches.next().captured(1).trimmed())); - } - } -} - -void ImdbReferencePage::extractRating(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - Rating rating; - rating.source = "imdb"; - rating.maxRating = 10; - - rx.setPattern(R"re(([0-9.,]+))re"); - match = rx.match(html); - if (match.hasMatch()) { - rating.rating = match.captured(1).trimmed().replace(",", ".").toDouble(); - } - rx.setPattern(R"re(\(([0-9,.]+)\))re"); - match = rx.match(html); - if (match.hasMatch()) { - rating.voteCount = match.captured(1).trimmed().remove(",").remove(".").toInt(); - } - if (rating.rating > 0 || rating.voteCount > 0) { - movie->ratings().setOrAddRating(rating); - } - - // Top250 for movies - rx.setPattern("Top Rated Movies:? #([0-9]{1,3})"); - match = rx.match(html); - if (match.hasMatch()) { - movie->setTop250(match.captured(1).toInt()); - } - // Top250 for TV shows (used by TheTvDb) - rx.setPattern("Top Rated TV:? #([0-9]{1,3})\\n"); - match = rx.match(html); - if (match.hasMatch()) { - movie->setTop250(match.captured(1).toInt()); - } -} - -void ImdbReferencePage::extractOverview(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - // Outline -------------------------- - - rx.setPattern(R"(
\n\s+
(.+)
)"); - match = rx.match(html); - if (match.hasMatch()) { - const QString outline = match.captured(1).trimmed(); - if (!outline.isEmpty()) { - movie->setOutline(removeHtmlEntities(outline)); - } - } - - // Overview -------------------------- - - rx.setPattern(R"(Plot Summary\n\s+\n\s+

(.+)<)"); - match = rx.match(html); - if (match.hasMatch()) { - const QString overview = match.captured(1).trimmed(); - if (!overview.isEmpty()) { - movie->setOverview(removeHtmlEntities(overview)); - } - } -} - -void ImdbReferencePage::extractTaglines(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - rx.setPattern(R"(Taglines\n\s+(.*)setTagline(tagline); - } - } -} - -void ImdbReferencePage::extractTags(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - rx.setPattern(R"(Plot Keywords\n\s+(.*))"); - match = rx.match(html); - if (match.hasMatch()) { - const QString tagsHtml = match.captured(1); - rx.setPattern(R"(([^<]+))"); - QRegularExpressionMatchIterator tagMatches = rx.globalMatch(tagsHtml); - - while (tagMatches.hasNext()) { - const QString tag = tagMatches.next().captured(1).trimmed(); - if (!tag.isEmpty()) { - movie->addTag(tag); - } - } - } -} - -void ImdbReferencePage::extractCountries(Movie* movie, const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; - - rx.setPattern(R"(Country(.*))"); - match = rx.match(html); - if (match.hasMatch()) { - const QString content = match.captured(1); - rx.setPattern(R"(([^<]+))"); - QRegularExpressionMatchIterator countryMatches = rx.globalMatch(content); - while (countryMatches.hasNext()) { - movie->addCountry(helper::mapCountry(countryMatches.next().captured(1).trimmed())); - } - } -} - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/imdb/ImdbReferencePage.h b/src/scrapers/imdb/ImdbReferencePage.h deleted file mode 100644 index b882af41da..0000000000 --- a/src/scrapers/imdb/ImdbReferencePage.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include -#include - -class Movie; - -namespace mediaelch { -namespace scraper { - -class ImdbReferencePage -{ -public: - /// Extract the release date from the given reference page. - /// If no release date can be extracted, an invalid QDate is returned. - static QDate extractReleaseDate(const QString& html); - - static QString extractTitle(const QString& html); - static QString extractOriginalTitle(const QString& html); - - static void extractStudios(Movie* movie, const QString& html); - static void extractDirectors(Movie* movie, const QString& html); - static void extractWriters(Movie* movie, const QString& html); - static void extractCertification(Movie* movie, const QString& html); - static void extractGenres(Movie* movie, const QString& html); - static void extractRating(Movie* movie, const QString& html); - static void extractOverview(Movie* movie, const QString& html); - static void extractTaglines(Movie* movie, const QString& html); - static void extractTags(Movie* movie, const QString& html); - static void extractCountries(Movie* movie, const QString& html); -}; - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/imdb/ImdbSearchPage.cpp b/src/scrapers/imdb/ImdbSearchPage.cpp index 7061a5a27f..98c9a6df42 100644 --- a/src/scrapers/imdb/ImdbSearchPage.cpp +++ b/src/scrapers/imdb/ImdbSearchPage.cpp @@ -1,36 +1,56 @@ #include "ImdbSearchPage.h" -#include +#include "log/Log.h" -#include "scrapers/ScraperUtils.h" +#include +#include +#include namespace mediaelch { namespace scraper { -QVector ImdbSearchPage::parseSearch(const QString& html) +QVector ImdbSearchPage::parseSuggestResponse(const QString& json, + const QStringList& typeFilter) { - // Search result table from "https://www.imdb.com/search/title/?title=..." - // The results may contain the user's locale, e.g. `/de/title/…`. - static const QRegularExpression rx(R"( results; - QRegularExpressionMatchIterator matches = rx.globalMatch(html); - QRegularExpressionMatch match; - while (matches.hasNext()) { - match = matches.next(); - if (match.hasMatch()) { - QString title = normalizeFromHtml(match.captured(2)); - title.remove(listNo); - SearchResult result; - result.title = title; - result.identifier = match.captured(1); - result.released = QDate::fromString(match.captured(3), "yyyy"); - results.push_back(std::move(result)); + const QJsonArray items = doc.object().value("d").toArray(); + + for (const QJsonValue& item : items) { + const QJsonObject obj = item.toObject(); + const QString id = obj.value("id").toString(); + + // Only include title results (tt* IDs), skip name results (nm*) + if (!id.startsWith("tt")) { + continue; + } + + // Filter by type if requested + if (!typeFilter.isEmpty()) { + const QString type = obj.value("qid").toString(); + if (!typeFilter.contains(type, Qt::CaseInsensitive)) { + continue; + } + } + + SearchResult result; + result.identifier = id; + result.title = obj.value("l").toString(); + const int year = obj.value("y").toInt(0); + if (year > 0) { + result.released = QDate(year, 1, 1); } + results.push_back(std::move(result)); } return results; diff --git a/src/scrapers/imdb/ImdbSearchPage.h b/src/scrapers/imdb/ImdbSearchPage.h index 503042295b..f07d37a485 100644 --- a/src/scrapers/imdb/ImdbSearchPage.h +++ b/src/scrapers/imdb/ImdbSearchPage.h @@ -2,6 +2,7 @@ #include #include +#include #include namespace mediaelch { @@ -23,7 +24,12 @@ class ImdbSearchPage }; public: - static QVector parseSearch(const QString& html); + /// \brief Parse search results from the IMDB Suggest API JSON response. + /// \param json The JSON response from v3.sg.media-imdb.com/suggestion/ + /// \param typeFilter Comma-separated list of IMDB title types to include + /// (e.g. "feature,tv_movie" for movies, "tvSeries,tvMiniSeries" for TV). + /// If empty, all types are included. + static QVector parseSuggestResponse(const QString& json, const QStringList& typeFilter = {}); }; } // namespace scraper diff --git a/src/scrapers/movie/imdb/ImdbMovie.cpp b/src/scrapers/movie/imdb/ImdbMovie.cpp index 94bf37e584..001b6c656d 100644 --- a/src/scrapers/movie/imdb/ImdbMovie.cpp +++ b/src/scrapers/movie/imdb/ImdbMovie.cpp @@ -1,6 +1,5 @@ #include "scrapers/movie/imdb/ImdbMovie.h" -#include "scrapers/imdb/ImdbReferencePage.h" #include "scrapers/movie/imdb/ImdbMovieConfiguration.h" #include "scrapers/movie/imdb/ImdbMovieScrapeJob.h" #include "scrapers/movie/imdb/ImdbMovieSearchJob.h" @@ -36,7 +35,8 @@ ImdbMovie::ImdbMovie(ImdbMovieConfiguration& settings, QObject* parent) : MovieS MovieScraperInfo::Trailer, MovieScraperInfo::Countries, MovieScraperInfo::Actors, - MovieScraperInfo::Poster}; + MovieScraperInfo::Poster, + MovieScraperInfo::Backdrop}; m_meta.supportedLanguages = ImdbMovieConfiguration::supportedLanguages(); m_meta.defaultLocale = ImdbMovieConfiguration::defaultLocale(); m_meta.isAdult = false; diff --git a/src/scrapers/movie/imdb/ImdbMovieConfiguration.cpp b/src/scrapers/movie/imdb/ImdbMovieConfiguration.cpp index 535e62f97e..6d793eb20a 100644 --- a/src/scrapers/movie/imdb/ImdbMovieConfiguration.cpp +++ b/src/scrapers/movie/imdb/ImdbMovieConfiguration.cpp @@ -45,7 +45,26 @@ mediaelch::Locale ImdbMovieConfiguration::defaultLocale() QVector ImdbMovieConfiguration::supportedLanguages() { - return QVector({"en"}); + // With the GraphQL API migration, localization is supported via AKAs and + // country-specific certificates/release dates. Plots remain English-only. + return QVector({ + "en", + "de", + "fr", + "es", + "it", + "pt", + "ja", + "ko", + "zh", + "ru", + "nl", + "pl", + "sv", + "da", + "fi", + "no", + }); } bool ImdbMovieConfiguration::shouldLoadAllTags() diff --git a/src/scrapers/movie/imdb/ImdbMovieScrapeJob.cpp b/src/scrapers/movie/imdb/ImdbMovieScrapeJob.cpp index c429617e61..d6d2825ddc 100644 --- a/src/scrapers/movie/imdb/ImdbMovieScrapeJob.cpp +++ b/src/scrapers/movie/imdb/ImdbMovieScrapeJob.cpp @@ -1,19 +1,11 @@ #include "scrapers/movie/imdb/ImdbMovieScrapeJob.h" -#include "globals/Helper.h" +#include "data/movie/Movie.h" #include "log/Log.h" -#include "network/NetworkRequest.h" #include "scrapers/imdb/ImdbApi.h" #include "scrapers/imdb/ImdbJsonParser.h" -#include "scrapers/imdb/ImdbReferencePage.h" -#include "scrapers/movie/imdb/ImdbMovie.h" - -#include - -#include "scrapers/ScraperUtils.h" #include "utils/Containers.h" - namespace mediaelch { namespace scraper { @@ -33,77 +25,40 @@ void ImdbMovieScrapeJob::doStart() m_movie->clear(config().details); m_movie->setImdbId(m_imdbId); - m_api.loadTitle(config().locale, m_imdbId, ImdbApi::PageKind::Reference, [this](QString html, ScraperError error) { + m_api.loadTitleViaGraphQL(m_imdbId, [this](QString data, ScraperError error) { if (error.hasError()) { setScraperError(error); emitFinished(); return; } - - parseAndAssignInfos(html); - - // How many pages do we have to download? Count them. Initial value '1' is the reference page itself. - m_itemsLeftToDownloads = 1; - - // IMDb has an extra page listing all tags (popular movies can have more than 100 tags). - if (m_loadAllTags) { - ++m_itemsLeftToDownloads; - loadTags(); - } - - if (config().details.contains(MovieScraperInfo::Overview)) { - // IMDb has a specific page for plot summaries, which we use for the movie's plot/overview. - // As this is an additional request, only do so if necessary. - ++m_itemsLeftToDownloads; - loadPlotSummary(); - } - - // It's possible that none of the above items should be loaded. - decreaseDownloadCount(); + parseAndAssignInfos(data); + emitFinished(); }); } -void ImdbMovieScrapeJob::loadTags() +void ImdbMovieScrapeJob::parseAndAssignInfos(const QString& json) { - const auto cb = [this](QString html, ScraperError error) { - if (!error.hasError()) { - parseAndAssignTags(html); - - } else { - setScraperError(error); - } - decreaseDownloadCount(); - }; - m_api.loadTitle(config().locale, m_imdbId, ImdbApi::PageKind::Keywords, cb); -} - -void ImdbMovieScrapeJob::loadPlotSummary() -{ - const auto cb = [this](QString html, ScraperError error) { - if (!error.hasError()) { - parseAndAssignOverviewFromPlotSummaryPage(html); - - } else { - setScraperError(error); - } - decreaseDownloadCount(); - }; - m_api.loadTitle(config().locale, m_imdbId, ImdbApi::PageKind::PlotSummary, cb); -} - -void ImdbMovieScrapeJob::parseAndAssignInfos(const QString& html) -{ - ImdbData data = ImdbJsonParser::parseFromReferencePage(html, config().locale); + ImdbData data = ImdbJsonParser::parseFromGraphQL(json, config().locale); if (data.imdbId.isValid()) { m_movie->setImdbId(data.imdbId); } - if (data.title.hasValue()) { + + // Title: use localized title if available, keep original as originalTitle + if (data.localizedTitle.hasValue()) { + m_movie->setTitle(data.localizedTitle.value); + if (data.originalTitle.hasValue()) { + m_movie->setOriginalTitle(data.originalTitle.value); + } else if (data.title.hasValue()) { + m_movie->setOriginalTitle(data.title.value); + } + } else if (data.title.hasValue()) { m_movie->setTitle(data.title.value); + if (data.originalTitle.hasValue()) { + m_movie->setOriginalTitle(data.originalTitle.value); + } } - if (data.originalTitle.hasValue()) { - m_movie->setOriginalTitle(data.originalTitle.value); - } + if (data.overview.hasValue()) { m_movie->setOverview(data.overview.value); } @@ -119,7 +74,7 @@ void ImdbMovieScrapeJob::parseAndAssignInfos(const QString& html) if (data.released.hasValue()) { m_movie->setReleased(data.released.value); } - for (Rating rating : data.ratings) { + for (const Rating& rating : data.ratings) { m_movie->ratings().addRating(rating); } @@ -131,10 +86,13 @@ void ImdbMovieScrapeJob::parseAndAssignInfos(const QString& html) if (data.poster.hasValue()) { m_movie->images().addPoster(data.poster.value); } + for (const Poster& backdrop : data.backdrops) { + m_movie->images().addBackdrop(backdrop); + } if (data.trailer.hasValue()) { m_movie->setTrailer(data.trailer.value); } - for (Actor actor : data.actors) { + for (const Actor& actor : data.actors) { m_movie->addActor(actor); } if (!data.directors.isEmpty()) { @@ -143,65 +101,33 @@ void ImdbMovieScrapeJob::parseAndAssignInfos(const QString& html) if (!data.writers.isEmpty()) { m_movie->setWriter(setToStringList(data.writers).join(", ")); } - for (QString genre : data.genres) { + for (const QString& genre : data.genres) { m_movie->addGenre(genre); } - for (QString studio : data.studios) { + for (const QString& studio : data.studios) { m_movie->addStudio(studio); } - for (QString country : data.countries) { + for (const QString& country : data.countries) { m_movie->addCountry(country); } - for (QString keyword : data.keywords) { - m_movie->addTag(keyword); - } -} - -void ImdbMovieScrapeJob::parseAndAssignTags(const QString& html) -{ - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::DotMatchesEverythingOption | QRegularExpression::InvertedGreedinessOption); - if (m_loadAllTags) { - rx.setPattern(R"(]+href="/search/(?:title/\?)keyword[^"]+"\n?>([^<]+))"); + if (!m_loadAllTags) { + // When "Load All Tags" is disabled, only add tags that are part of IMDB's + // default set (first ~20). The GraphQL query fetches up to 100 keywords. + // Since we can't distinguish "default" from "extended" keywords, we limit + // to the first 20 when the setting is off. + int tagLimit = 20; + int tagCount = 0; + for (const QString& keyword : data.keywords) { + if (tagCount >= tagLimit) { + break; + } + m_movie->addTag(keyword); + ++tagCount; + } } else { - rx.setPattern(R"(]+href="/keyword/[^"]+"[^>]*>([^<]+))"); - } - - QRegularExpressionMatchIterator match = rx.globalMatch(html); - while (match.hasNext()) { - m_movie->addTag(match.next().captured(1).trimmed()); - } -} - -void ImdbMovieScrapeJob::parseAndAssignOverviewFromPlotSummaryPage(const QString& html) -{ - const Optional overview = ImdbJsonParser::parseOverviewFromPlotSummaryPage(html); - - if (overview.hasValue()) { - m_movie->setOverview(overview.value); - } -} - -QString ImdbMovieScrapeJob::sanitizeAmazonMediaUrl(QString url) -{ - // The URL can look like this: - // https://m.media-amazon.com/images/M/._V1_UY1400_CR90,0,630,1200_AL_.jpg - // To get the original image, everything after `._V` can be removed. - - if (!url.endsWith(".jpg")) { - return url; - } - QRegularExpression rx(R"re(._V([^/]+).jpg$)re", QRegularExpression::InvertedGreedinessOption); - url.replace(rx, ".jpg"); - - return url; -} - -void ImdbMovieScrapeJob::decreaseDownloadCount() -{ - --m_itemsLeftToDownloads; - if (m_itemsLeftToDownloads <= 0) { - emitFinished(); + for (const QString& keyword : data.keywords) { + m_movie->addTag(keyword); + } } } diff --git a/src/scrapers/movie/imdb/ImdbMovieScrapeJob.h b/src/scrapers/movie/imdb/ImdbMovieScrapeJob.h index 3b7b5256c7..a0722fe507 100644 --- a/src/scrapers/movie/imdb/ImdbMovieScrapeJob.h +++ b/src/scrapers/movie/imdb/ImdbMovieScrapeJob.h @@ -19,24 +19,12 @@ class ImdbMovieScrapeJob : public MovieScrapeJob void doStart() override; private: - void loadTags(); - void loadPlotSummary(); + void parseAndAssignInfos(const QString& json); - void parseAndAssignInfos(const QString& html); - void parseAndAssignTags(const QString& html); - void parseAndAssignOverviewFromPlotSummaryPage(const QString& html); - - static QString sanitizeAmazonMediaUrl(QString url); - - void decreaseDownloadCount(); - -private: // config +private: ImdbApi& m_api; ImdbId m_imdbId; bool m_loadAllTags = false; - -private: // initialized during scraping - int m_itemsLeftToDownloads = 0; }; } // namespace scraper diff --git a/src/scrapers/movie/imdb/ImdbMovieSearchJob.cpp b/src/scrapers/movie/imdb/ImdbMovieSearchJob.cpp index 025956a82d..9d64e068b2 100644 --- a/src/scrapers/movie/imdb/ImdbMovieSearchJob.cpp +++ b/src/scrapers/movie/imdb/ImdbMovieSearchJob.cpp @@ -1,13 +1,12 @@ #include "scrapers/movie/imdb/ImdbMovieSearchJob.h" -#include "scrapers/ScraperUtils.h" +#include "log/Log.h" #include "scrapers/imdb/ImdbApi.h" - -#include -#include - #include "scrapers/imdb/ImdbSearchPage.h" +#include +#include + namespace mediaelch { namespace scraper { @@ -29,52 +28,64 @@ void ImdbMovieSearchJob::searchViaImdbId() { MediaElch_Debug_Ensures(ImdbId::isValidFormat(config().query)); - m_api.loadTitle( - Locale("en"), ImdbId(config().query), ImdbApi::PageKind::Reference, [this](QString data, ScraperError error) { - if (error.hasError()) { - setScraperError(error); - } else { - parseIdFromMovieReferencePage(data); - } - emitFinished(); - }); + m_api.loadTitleViaGraphQL(ImdbId(config().query), [this](QString data, ScraperError error) { + if (error.hasError()) { + setScraperError(error); + } else { + parseGraphQLResult(data); + } + emitFinished(); + }); } void ImdbMovieSearchJob::searchViaQuery() { MediaElch_Debug_Ensures(!ImdbId::isValidFormat(config().query)); - m_api.searchForMovie(Locale("en"), config().query, config().includeAdult, [this](QString data, ScraperError error) { + m_api.suggestSearch(config().query, [this](QString data, ScraperError error) { if (error.hasError()) { setScraperError(error); } else { - parseSearch(data); + parseSuggestResults(data); } emitFinished(); }); } - -void ImdbMovieSearchJob::parseIdFromMovieReferencePage(const QString& html) +void ImdbMovieSearchJob::parseSuggestResults(const QString& json) { - MovieSearchJob::Result result; - result.identifier = MovieIdentifier(config().query); + // Movie types: movie, tvMovie, short, video, tvShort + const QStringList movieTypes{"movie", "tvMovie", "short", "video", "tvShort"}; + auto results = ImdbSearchPage::parseSuggestResponse(json, movieTypes); + for (const auto& result : results) { + m_results << MovieSearchJob::Result{result.title, result.released, MovieIdentifier{result.identifier}}; + } +} - QRegularExpression rx; - rx.setPatternOptions(QRegularExpression::InvertedGreedinessOption); - QRegularExpressionMatch match; +void ImdbMovieSearchJob::parseGraphQLResult(const QString& json) +{ + QJsonParseError parseError{}; + const QJsonDocument doc = QJsonDocument::fromJson(json.toUtf8(), &parseError); + if (parseError.error != QJsonParseError::NoError) { + qCWarning(generic) << "[ImdbMovieSearchJob] JSON parse error:" << parseError.errorString(); + return; + } - rx.setPattern(R"re("titleText":{"text":"([^"]+)","__typename":"TitleText")re"); - match = rx.match(html); - if (match.hasMatch()) { - result.title = match.captured(1).trimmed(); + const QJsonObject title = doc.object().value("data").toObject().value("title").toObject(); + if (title.isEmpty()) { + return; } - // For search results, we are only interested in the year, not the full release date. - rx.setPattern(R"re("releaseYear":{"year":(\d{4}))re"); - match = rx.match(html); - if (match.hasMatch()) { - result.released = QDate::fromString(match.captured(1), "yyyy"); + MovieSearchJob::Result result; + result.identifier = MovieIdentifier(config().query); + result.title = title.value("titleText").toObject().value("text").toString(); + + const QJsonObject releaseDate = title.value("releaseDate").toObject(); + const int year = releaseDate.value("year").toInt(0); + if (year > 0) { + const int month = releaseDate.value("month").toInt(1); + const int day = releaseDate.value("day").toInt(1); + result.released = QDate(year, month, day); } if (!result.title.isEmpty()) { @@ -82,13 +93,5 @@ void ImdbMovieSearchJob::parseIdFromMovieReferencePage(const QString& html) } } -void ImdbMovieSearchJob::parseSearch(const QString& html) -{ - auto results = ImdbSearchPage::parseSearch(html); - for (const auto& result : results) { - m_results << MovieSearchJob::Result{result.title, result.released, MovieIdentifier{result.identifier}}; - } -} - } // namespace scraper } // namespace mediaelch diff --git a/src/scrapers/movie/imdb/ImdbMovieSearchJob.h b/src/scrapers/movie/imdb/ImdbMovieSearchJob.h index f8357c51b2..c593729067 100644 --- a/src/scrapers/movie/imdb/ImdbMovieSearchJob.h +++ b/src/scrapers/movie/imdb/ImdbMovieSearchJob.h @@ -21,8 +21,8 @@ class ImdbMovieSearchJob : public MovieSearchJob void searchViaImdbId(); void searchViaQuery(); - void parseSearch(const QString& html); - void parseIdFromMovieReferencePage(const QString& html); + void parseSuggestResults(const QString& json); + void parseGraphQLResult(const QString& json); private: ImdbApi& m_api; diff --git a/src/scrapers/tv_show/imdb/CMakeLists.txt b/src/scrapers/tv_show/imdb/CMakeLists.txt index 1db602ed1d..fafb493c63 100644 --- a/src/scrapers/tv_show/imdb/CMakeLists.txt +++ b/src/scrapers/tv_show/imdb/CMakeLists.txt @@ -1,13 +1,10 @@ add_library( mediaelch_scraper_tv_imdb OBJECT ImdbTv.cpp - ImdbTvEpisodeParser.cpp ImdbTvEpisodeScrapeJob.cpp ImdbTvSeasonScrapeJob.cpp - ImdbTvSeasonParser.cpp ImdbTvShowScrapeJob.cpp ImdbTvShowSearchJob.cpp - ImdbTvShowParser.cpp ImdbTvConfiguration.cpp ) diff --git a/src/scrapers/tv_show/imdb/ImdbTv.cpp b/src/scrapers/tv_show/imdb/ImdbTv.cpp index 86fe72c327..27fa722c6a 100644 --- a/src/scrapers/tv_show/imdb/ImdbTv.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTv.cpp @@ -33,7 +33,8 @@ ImdbTv::ImdbTv(ImdbTvConfiguration& settings, QObject* parent) : TvScraper(paren ShowScraperInfo::Tags, ShowScraperInfo::Runtime, ShowScraperInfo::FirstAired, - ShowScraperInfo::Poster}; + ShowScraperInfo::Poster, + ShowScraperInfo::Fanart}; m_meta.supportedEpisodeDetails = {EpisodeScraperInfo::Title, EpisodeScraperInfo::Actors, EpisodeScraperInfo::Overview, diff --git a/src/scrapers/tv_show/imdb/ImdbTvConfiguration.cpp b/src/scrapers/tv_show/imdb/ImdbTvConfiguration.cpp index 1692441b46..b94c510263 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvConfiguration.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTvConfiguration.cpp @@ -36,12 +36,31 @@ void ImdbTvConfiguration::setLanguage(const Locale& value) mediaelch::Locale ImdbTvConfiguration::defaultLocale() { - return mediaelch::Locale::NoLocale; + return mediaelch::Locale{"en"}; } QVector ImdbTvConfiguration::supportedLanguages() { - return QVector({Locale::NoLocale}); + // With the GraphQL API migration, localization is supported via AKAs and + // country-specific certificates. Plots remain English-only. + return QVector({ + "en", + "de", + "fr", + "es", + "it", + "pt", + "ja", + "ko", + "zh", + "ru", + "nl", + "pl", + "sv", + "da", + "fi", + "no", + }); } diff --git a/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.cpp b/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.cpp deleted file mode 100644 index 62df20aeed..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "scrapers/tv_show/imdb/ImdbTvEpisodeParser.h" - -#include "data/Poster.h" -#include "data/TvDbId.h" -#include "data/tv_show/TvShowEpisode.h" -#include "globals/Helper.h" -#include "scrapers/ScraperUtils.h" -#include "scrapers/imdb/ImdbReferencePage.h" - -#include "scrapers/imdb/ImdbJsonParser.h" - -#include -#include - -#include "utils/Containers.h" - -namespace mediaelch { -namespace scraper { - -void ImdbTvEpisodeParser::parseInfos(TvShowEpisode& episode, const QString& html, const Locale& preferredLocale) -{ - // Note: Expects HTML from https://www.imdb.com/title/tt________/reference - using namespace std::chrono; - - ImdbData data = ImdbJsonParser::parseFromReferencePage(html, preferredLocale); - - if (data.imdbId.isValid()) { - episode.setImdbId(data.imdbId); - } - if (data.title.hasValue()) { - episode.setTitle(data.title.value); - } - // Enable once original titles exist for episodes. - // if (data.originalTitle.hasValue()) { - // episode.setOriginalTitle(data.originalTitle.value); - // } - - if (data.outline.hasValue()) { - // TODO: We use the outline for the overview; at the moment, we don't distinguish them in TV episodes. - episode.setOverview(data.outline.value); - } else if (data.overview.hasValue()) { - episode.setOverview(data.overview.value); - } - - if (data.released.hasValue()) { - episode.setFirstAired(data.released.value); - } - for (Rating rating : data.ratings) { - episode.ratings().addRating(rating); - } - if (data.top250.hasValue()) { - episode.setTop250(data.top250.value); - } - if (data.certification.hasValue()) { - episode.setCertification(data.certification.value); - } - for (const Actor& actor : data.actors) { - episode.addActor(actor); - } - if (!data.directors.isEmpty()) { - episode.setDirectors(setToStringList(data.directors)); - } - if (!data.writers.isEmpty()) { - episode.setWriters(setToStringList(data.writers)); - } - for (const QString& keyword : data.keywords) { - episode.addTag(keyword); - } - if (data.poster.hasValue()) { - episode.setThumbnail(data.poster.value.originalUrl); - } - // TODO - // - genres - // - setNetwork - // TODO if supported by episode class - // - runtime - // - keywords - // - tagline -} - -void ImdbTvEpisodeParser::parseIdFromSeason(TvShowEpisode& episode, const QString& html) -{ - // e.g. from https://www.imdb.com/title/tt0096697/episodes?season=4 - // Example JSON: - // ```json - // {"id":"tt0096697","type":"tvEpisode","season":"2","episode":"0"…} - // ``` - QRegularExpression regex(QStringLiteral(R"re("id":"(tt\d+)","type":"tvEpisode","season":"\d+","episode":"%1")re") - .arg(episode.episodeNumber().toString()), - QRegularExpression::InvertedGreedinessOption | QRegularExpression::DotMatchesEverythingOption); - QRegularExpressionMatch match = regex.match(html); - if (!match.hasMatch()) { - return; - } - - ImdbId imdbId(match.captured(1).trimmed()); - if (imdbId.isValid()) { - episode.setImdbId(imdbId); - } -} - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.h b/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.h deleted file mode 100644 index 3a4d4ecb42..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvEpisodeParser.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "data/ImdbId.h" -#include "data/Locale.h" - -#include - -class TvShowEpisode; - -namespace mediaelch { -namespace scraper { - -class ImdbTvEpisodeParser -{ -public: - /// \brief Parse the given HTML string and assign the details to the given episode. - /// \param episode Where to store the episode details into. - /// \param html HTML string from imdb.com - /// \param preferredLocale Use this locale if values exist in multiple languages. - static void parseInfos(TvShowEpisode& episode, const QString& html, const Locale& preferredLocale); - /// \brief Parses the IMDb id from the IMDb season HTML code for the given episode - /// by using its season/episode number. - /// \param episode Where to store the episode ID into. - /// \param html Season HTML string from imdb.com - static void parseIdFromSeason(TvShowEpisode& episode, const QString& html); -}; - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.cpp b/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.cpp index 9d1c0e4b71..cc0c925586 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.cpp @@ -3,7 +3,8 @@ #include "data/tv_show/TvShowEpisode.h" #include "log/Log.h" #include "scrapers/imdb/ImdbApi.h" -#include "scrapers/tv_show/imdb/ImdbTvEpisodeParser.h" +#include "scrapers/imdb/ImdbJsonParser.h" +#include "utils/Containers.h" #include @@ -20,13 +21,13 @@ void ImdbTvEpisodeScrapeJob::doStart() if (config().identifier.hasEpisodeIdentifier()) { loadEpisode(ImdbId(config().identifier.episodeIdentifier)); } else { - loadSeason(); + loadFromSeason(); } } -void ImdbTvEpisodeScrapeJob::loadSeason() +void ImdbTvEpisodeScrapeJob::loadFromSeason() { - qCDebug(generic) << "[ImdbTvEpisodeScrapeJob] Have to load season first."; + qCDebug(generic) << "[ImdbTvEpisodeScrapeJob] Loading episode via season bulk query."; ImdbId showId(config().identifier.showIdentifier); @@ -40,32 +41,38 @@ void ImdbTvEpisodeScrapeJob::loadSeason() return; } - // The episode parser requires season/episode to be set when - // calling parseIdFromSeason() episode().setSeason(config().identifier.seasonNumber); episode().setEpisode(config().identifier.episodeNumber); - m_api.loadSeason( - config().locale, showId, config().identifier.seasonNumber, [this, showId](QString html, ScraperError error) { - if (error.hasError()) { - setScraperError(error); - emitFinished(); + // Load episodes for the specific season via GraphQL and find the one we need + m_api.loadSeasonEpisodesViaGraphQL( + showId, config().identifier.seasonNumber.toInt(), 250, [this](QString data, ScraperError error) { + if (error.hasError()) { + setScraperError(error); + emitFinished(); + return; + } + + const QVector episodes = ImdbJsonParser::parseEpisodesFromGraphQL(data, config().locale); + const int targetSeason = config().identifier.seasonNumber.toInt(); + const int targetEpisode = config().identifier.episodeNumber.toInt(); + + for (const ImdbEpisodeData& epData : episodes) { + if (epData.seasonNumber == targetSeason && epData.episodeNumber == targetEpisode) { + // Found our episode — load its full details via individual GraphQL query + loadEpisode(epData.imdbId); return; } - ImdbTvEpisodeParser::parseIdFromSeason(episode(), html); - if (!episode().imdbId().isValid()) { - qCWarning(generic) << "[ImdbTvEpisodeScrapeJob] Could not parse IMDb ID for episode from season page! " - << episode().seasonNumber() << episode().episodeNumber(); - ScraperError configError; - configError.error = ScraperError::Type::ConfigError; - configError.message = - tr("IMDb ID could not be loaded from season page! Cannot load requested episode."); - setScraperError(configError); - emitFinished(); - } else { - loadEpisode(episode().imdbId()); - } - }); + } + + qCWarning(generic) << "[ImdbTvEpisodeScrapeJob] Could not find episode S" << targetSeason << "E" + << targetEpisode << "in GraphQL response"; + ScraperError notFoundError; + notFoundError.error = ScraperError::Type::ConfigError; + notFoundError.message = tr("Episode not found in season listing."); + setScraperError(notFoundError); + emitFinished(); + }); } void ImdbTvEpisodeScrapeJob::loadEpisode(const ImdbId& episodeId) @@ -81,21 +88,57 @@ void ImdbTvEpisodeScrapeJob::loadEpisode(const ImdbId& episodeId) } qCInfo(generic) << "[ImdbTvEpisodeScrapeJob] Loading episode with IMDb ID" << episodeId.toString(); - m_api.loadTitle(config().locale, episodeId, ImdbApi::PageKind::Reference, [this](QString html, ScraperError error) { + m_api.loadTitleViaGraphQL(episodeId, [this](QString data, ScraperError error) { if (error.hasError()) { setScraperError(error); - } else if (html.isEmpty()) { - qCWarning(generic) << "[ImdbTvEpisodeScrapeJob] Empty episode HTML!"; + } else if (data.isEmpty()) { + qCWarning(generic) << "[ImdbTvEpisodeScrapeJob] Empty GraphQL response!"; ScraperError networkError; networkError.error = ScraperError::Type::NetworkError; networkError.message = tr("Loaded IMDb content is empty. Cannot load requested episode."); setScraperError(networkError); } else { - ImdbTvEpisodeParser::parseInfos(episode(), html, config().locale); + parseAndAssignInfos(data); } emitFinished(); }); } +void ImdbTvEpisodeScrapeJob::parseAndAssignInfos(const QString& json) +{ + ImdbData data = ImdbJsonParser::parseFromGraphQL(json, config().locale); + + if (data.imdbId.isValid()) { + episode().setImdbId(data.imdbId); + } + if (data.title.hasValue()) { + episode().setTitle(data.title.value); + } + if (data.overview.hasValue()) { + episode().setOverview(data.overview.value); + } + if (data.released.hasValue()) { + episode().setFirstAired(data.released.value); + } + for (const Rating& rating : data.ratings) { + episode().ratings().addRating(rating); + } + if (data.certification.hasValue()) { + episode().setCertification(data.certification.value); + } + if (data.poster.hasValue()) { + episode().setThumbnail(data.poster.value.thumbUrl); + } + if (!data.directors.isEmpty()) { + episode().setDirectors(setToStringList(data.directors)); + } + if (!data.writers.isEmpty()) { + episode().setWriters(setToStringList(data.writers)); + } + for (const Actor& actor : data.actors) { + episode().addActor(actor); + } +} + } // namespace scraper } // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.h b/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.h index 23e46bf5cf..021170e231 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.h +++ b/src/scrapers/tv_show/imdb/ImdbTvEpisodeScrapeJob.h @@ -17,8 +17,9 @@ class ImdbTvEpisodeScrapeJob : public EpisodeScrapeJob void doStart() override; private: - void loadSeason(); void loadEpisode(const ImdbId& episodeId); + void loadFromSeason(); + void parseAndAssignInfos(const QString& json); private: ImdbApi& m_api; diff --git a/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.cpp b/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.cpp deleted file mode 100644 index 433cb60e8a..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "scrapers/tv_show/imdb/ImdbTvSeasonParser.h" - -#include "data/tv_show/TvShowEpisode.h" -#include "globals/Helper.h" -#include "scrapers/imdb/ImdbJsonParser.h" -#include "scrapers/tv_show/imdb/ImdbTvEpisodeParser.h" - -#include -#include -#include - -namespace mediaelch { -namespace scraper { - -QSet ImdbTvSeasonParser::parseSeasonNumbersFromEpisodesPage(const QString& html) -{ - QVector seasonList = ImdbJsonParser::parseSeasonNumbersFromEpisodesPage(html); - - QSet seasons; - for (int season : seasonList) { - seasons << SeasonNumber(season); - } - - return seasons; -} - -QMap ImdbTvSeasonParser::parseEpisodeIds(const QString& html, int forSeason) -{ - QVector episodesList = ImdbJsonParser::parseEpisodeIds(html); - - QMap ids; - for (const ImdbShortEpisodeData& entry : episodesList) { - if (entry.seasonNumber == forSeason) { - EpisodeNumber episode(entry.episodeNumber); - ids.insert(episode, ImdbId(entry.imdbId)); - } - } - - return ids; -} - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.h b/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.h deleted file mode 100644 index 1c9dc1eabe..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvSeasonParser.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include "data/ImdbId.h" -#include "data/tv_show/EpisodeNumber.h" -#include "data/tv_show/SeasonNumber.h" - -#include -#include -#include -#include - -class TvShowEpisode; - -namespace mediaelch { -namespace scraper { - -class ImdbApi; - -class ImdbTvSeasonParser -{ -public: - ImdbTvSeasonParser() = default; - - /// \brief Returns a list of available seasons which is parsed from the - /// episode overview page of a TV show. - /// \param html HTML from https://www.imdb.com/title/tt/episodes - static QSet parseSeasonNumbersFromEpisodesPage(const QString& html); - - /// \brief Parses episode IDs from the HTML. - /// \param html IMDb website HTML for a season page. - /// \param forSeason Only parse episode IDs for this season. - static QMap parseEpisodeIds(const QString& html, int forSeason); -}; - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.cpp b/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.cpp index 2fe199103e..e3e9acbce2 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.cpp @@ -3,9 +3,9 @@ #include "data/tv_show/TvShowEpisode.h" #include "log/Log.h" #include "scrapers/imdb/ImdbApi.h" -#include "scrapers/tv_show/imdb/ImdbTvSeasonParser.h" +#include "scrapers/imdb/ImdbJsonParser.h" +#include "utils/Containers.h" -#include #include namespace mediaelch { @@ -28,97 +28,75 @@ void ImdbTvSeasonScrapeJob::doStart() return; } - if (config().shouldLoadAllSeasons()) { - loadAllSeasons(); - - } else { - gatherAndLoadEpisodes(config().seasons.values(), {}); - } -} - -void ImdbTvSeasonScrapeJob::loadEpisodes(QMap> episodeIds) -{ - if (episodeIds.isEmpty()) { - emitFinished(); - return; - } - - // Get next episode to load and remove it from episodeIds - const SeasonNumber nextSeason = episodeIds.keys().first(); - - // If there is no episode left in that season then remove it. - if (episodeIds[nextSeason].isEmpty()) { - episodeIds.remove(nextSeason); - loadEpisodes(episodeIds); - return; - } - - QMap episodes = episodeIds[nextSeason]; - const EpisodeNumber nextEpisode = episodes.keys().first(); - const ImdbId nextEpisodeId = episodes[nextEpisode]; - episodeIds[nextSeason].remove(nextEpisode); - - // Create episode: We need to set some details because not everything is available - // from the single episode page (or can be scraped in a stable manner). - auto* episode = new TvShowEpisode({}, this); - episode->setSeason(nextSeason); - episode->setEpisode(nextEpisode); - episode->setImdbId(nextEpisodeId); - - qCInfo(generic) << "[ImdbTvSeasonScrapeJob] Start loading season" << nextSeason.toInt() << "episode" - << nextEpisode.toInt() << "of show" << config().showIdentifier.str(); - - m_api.loadTitle(config().locale, - nextEpisodeId, - ImdbApi::PageKind::Reference, - [this, episode, episodeIds](QString html, ScraperError error) { - if (error.hasError()) { - // only store error but try to load other episodes - setScraperError(error); - } else if (!html.isEmpty()) { - ImdbTvEpisodeParser::parseInfos(*episode, html, config().locale); - storeEpisode(episode); - } - loadEpisodes(episodeIds); - }); + loadEpisodes(); } -void ImdbTvSeasonScrapeJob::gatherAndLoadEpisodes(QList seasonsToLoad, - QMap> episodeIds) +void ImdbTvSeasonScrapeJob::loadEpisodes() { - if (seasonsToLoad.isEmpty()) { - loadEpisodes(episodeIds); - return; - } - - const SeasonNumber nextSeason = seasonsToLoad.takeFirst(); - const ImdbApi::ApiCallback callback = [this, nextSeason, seasonsToLoad, episodeIds]( - QString html, ScraperError error) { + // Load all episodes in bulk via GraphQL — one request for up to 250 episodes. + // This replaces the old sequential per-episode loading pattern. + m_api.loadEpisodesViaGraphQL(m_showId, 250, [this](QString data, ScraperError error) { if (error.hasError()) { setScraperError(error); emitFinished(); return; } - QMap episodesForSeason = ImdbTvSeasonParser::parseEpisodeIds(html, nextSeason.toInt()); - auto ids = episodeIds; - ids.insert(nextSeason, episodesForSeason); - gatherAndLoadEpisodes(seasonsToLoad, ids); - }; - - m_api.loadSeason(config().locale, m_showId, nextSeason, callback); + parseAndStoreEpisodes(data); + emitFinished(); + }); } -void ImdbTvSeasonScrapeJob::loadAllSeasons() +void ImdbTvSeasonScrapeJob::parseAndStoreEpisodes(const QString& json) { - m_api.loadDefaultEpisodesPage(config().locale, m_showId, [this](QString html, ScraperError error) { - if (error.hasError()) { - setScraperError(error); - emitFinished(); - return; + const QVector episodes = ImdbJsonParser::parseEpisodesFromGraphQL(json, config().locale); + + for (const ImdbEpisodeData& epData : episodes) { + const SeasonNumber season(epData.seasonNumber); + const EpisodeNumber epNum(epData.episodeNumber); + + // Skip episodes from seasons we didn't request (unless loading all) + if (!config().shouldLoadAllSeasons() && !config().seasons.contains(season)) { + continue; } - QSet seasons = ImdbTvSeasonParser::parseSeasonNumbersFromEpisodesPage(html); - gatherAndLoadEpisodes(seasons.values(), {}); - }); + + auto* episode = new TvShowEpisode({}, this); + episode->setSeason(season); + episode->setEpisode(epNum); + episode->setImdbId(epData.imdbId); + + if (epData.title.hasValue()) { + episode->setTitle(epData.title.value); + } + if (epData.overview.hasValue()) { + episode->setOverview(epData.overview.value); + } + if (epData.firstAired.hasValue()) { + episode->setFirstAired(epData.firstAired.value); + } + if (epData.thumbnail.hasValue()) { + episode->setThumbnail(epData.thumbnail.value.thumbUrl); + } + for (const Rating& rating : epData.ratings) { + episode->ratings().addRating(rating); + } + if (epData.runtime.hasValue()) { + // TvShowEpisode doesn't have setRuntime — runtime is only on TvShow level + } + if (epData.certification.hasValue()) { + episode->setCertification(epData.certification.value); + } + if (!epData.directors.isEmpty()) { + episode->setDirectors(setToStringList(epData.directors)); + } + if (!epData.writers.isEmpty()) { + episode->setWriters(setToStringList(epData.writers)); + } + for (const Actor& actor : epData.actors) { + episode->addActor(actor); + } + + storeEpisode(episode); + } } void ImdbTvSeasonScrapeJob::storeEpisode(TvShowEpisode* episode) @@ -127,7 +105,6 @@ void ImdbTvSeasonScrapeJob::storeEpisode(TvShowEpisode* episode) if (config().shouldLoadAllSeasons() || config().seasons.contains(season)) { m_episodes[{season, episode->episodeNumber()}] = episode; } else { - // Only store episodes that are actually requested. episode->deleteLater(); } } diff --git a/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.h b/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.h index 358bbc179c..fea5f0f880 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.h +++ b/src/scrapers/tv_show/imdb/ImdbTvSeasonScrapeJob.h @@ -1,7 +1,6 @@ #pragma once #include "scrapers/tv_show/SeasonScrapeJob.h" -#include "scrapers/tv_show/imdb/ImdbTvEpisodeParser.h" #include @@ -20,15 +19,8 @@ class ImdbTvSeasonScrapeJob : public SeasonScrapeJob void doStart() override; private: - /// \brief Loads the given episodes in a sequential way - /// \todo Load in parallel. - void loadEpisodes(QMap> episodeIds); - /// \brief Gathers all episode IDs for the given seasons by loading each - /// season page and then calls loadEpisodes(). - void gatherAndLoadEpisodes(QList seasonsToLoad, - QMap> episodeIds); - void loadAllSeasons(); - /// \brief Store the given episode in the internal season-episode map. + void loadEpisodes(); + void parseAndStoreEpisodes(const QString& json); void storeEpisode(TvShowEpisode* episode); private: diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowParser.cpp b/src/scrapers/tv_show/imdb/ImdbTvShowParser.cpp deleted file mode 100644 index d9b23ff0e6..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvShowParser.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include "scrapers/tv_show/imdb/ImdbTvShowParser.h" - -#include "data/tv_show/TvShow.h" -#include "log/Log.h" -#include "scrapers/ScraperInterface.h" -#include "scrapers/ScraperUtils.h" - -#include -#include -#include -#include -#include -#include - -#include "scrapers/imdb/ImdbJsonParser.h" -#include "utils/Containers.h" - -using namespace std::chrono_literals; - -namespace mediaelch { -namespace scraper { - -ScraperError ImdbTvShowParser::parseInfos(const QString& html) -{ - ImdbData data = ImdbJsonParser::parseFromReferencePage(html, m_preferredLocale); - - if (data.imdbId.isValid()) { - m_show.setImdbId(data.imdbId); - } - if (data.title.hasValue()) { - m_show.setTitle(data.title.value); - } - if (data.originalTitle.hasValue()) { - m_show.setOriginalTitle(data.originalTitle.value); - } - - if (data.outline.hasValue()) { - // TODO: We use the outline for the overview; at the moment, we don't distinguish them in TV shows. - m_show.setOverview(data.outline.value); - } else if (data.overview.hasValue()) { - m_show.setOverview(data.overview.value); - } - - if (data.released.hasValue()) { - m_show.setFirstAired(data.released.value); - } - for (Rating rating : data.ratings) { - m_show.ratings().addRating(rating); - } - if (data.top250.hasValue()) { - m_show.setTop250(data.top250.value); - } - if (data.certification.hasValue()) { - m_show.setCertification(data.certification.value); - } - for (const Actor& actor : data.actors) { - m_show.addActor(actor); - } - for (const QString& keyword : data.keywords) { - m_show.addTag(keyword); - } - if (data.poster.hasValue()) { - m_show.addPoster(data.poster.value); - } - for (const QString& genre : data.genres) { - m_show.addGenre(genre); - } - if (data.runtime.hasValue()) { - m_show.setRuntime(data.runtime.value); - } - - return ScraperError{}; -} - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowParser.h b/src/scrapers/tv_show/imdb/ImdbTvShowParser.h deleted file mode 100644 index 8fd7f947b6..0000000000 --- a/src/scrapers/tv_show/imdb/ImdbTvShowParser.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include "scrapers/ScraperError.h" -#include "utils/Meta.h" - -#include -#include -#include -#include - -#include "data/Locale.h" - -class TvShow; - -namespace mediaelch { - -namespace scraper { - -class ImdbTvShowParser : public QObject -{ - Q_OBJECT - -public: - ImdbTvShowParser(TvShow& show, Locale preferredLocale) : m_show{show}, m_preferredLocale{std::move(preferredLocale)} - { - } - - ELCH_NODISCARD ScraperError parseInfos(const QString& html); - -private: - TvShow& m_show; - Locale m_preferredLocale; -}; - -} // namespace scraper -} // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.cpp b/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.cpp index aee99d3431..fe2f4c28a4 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.cpp @@ -2,6 +2,8 @@ #include "data/tv_show/TvShow.h" #include "log/Log.h" +#include "scrapers/imdb/ImdbJsonParser.h" +#include "utils/Containers.h" #include @@ -9,19 +11,7 @@ namespace mediaelch { namespace scraper { ImdbTvShowScrapeJob::ImdbTvShowScrapeJob(ImdbApi& api, ShowScrapeJob::Config _config, QObject* parent) : - ShowScrapeJob(_config, parent), - m_api{api}, - m_parser(tvShow(), _config.locale), - m_notLoaded{ShowScraperInfo::Title, - ShowScraperInfo::Genres, - ShowScraperInfo::Certification, - ShowScraperInfo::Overview, - ShowScraperInfo::Rating, - ShowScraperInfo::Tags, - ShowScraperInfo::Runtime, - ShowScraperInfo::FirstAired, - ShowScraperInfo::Poster}, - m_id{config().identifier.str()} + ShowScrapeJob(_config, parent), m_api{api}, m_id{config().identifier.str()} { } @@ -37,64 +27,75 @@ void ImdbTvShowScrapeJob::doStart() return; } tvShow().setImdbId(m_id); - // TV Show data is always loaded. - loadTvShow(); -} - -void ImdbTvShowScrapeJob::loadTvShow() -{ - const auto setInfosLoaded = [this]() { - const QSet availableScraperInfos = {ShowScraperInfo::Title, - ShowScraperInfo::Genres, - ShowScraperInfo::Certification, - ShowScraperInfo::Overview, - ShowScraperInfo::Rating, - ShowScraperInfo::Tags, - ShowScraperInfo::Runtime, - ShowScraperInfo::FirstAired, - ShowScraperInfo::Poster}; - for (const auto loaded : availableScraperInfos) { - if (shouldLoad(loaded)) { - setIsLoaded(loaded); - } - } - }; - const auto callback = [this, setInfosLoaded](QString html, ScraperError error) { - if (!error.hasError()) { - // We need to add the loaded information but may not want to actually store the show's information. - error = m_parser.parseInfos(html); - } + m_api.loadTitleViaGraphQL(m_id, [this](QString data, ScraperError error) { if (error.hasError()) { setScraperError(error); + emitFinished(); + return; } - setInfosLoaded(); - checkIfDone(); - }; - - m_api.loadTitle(config().locale, m_id, ImdbApi::PageKind::Reference, callback); + parseAndAssignInfos(data); + emitFinished(); + }); } - -bool ImdbTvShowScrapeJob::shouldLoad(ShowScraperInfo info) +void ImdbTvShowScrapeJob::parseAndAssignInfos(const QString& json) { - return m_notLoaded.contains(info); -} + ImdbData data = ImdbJsonParser::parseFromGraphQL(json, config().locale); -void ImdbTvShowScrapeJob::setIsLoaded(ShowScraperInfo info) -{ - if (m_notLoaded.contains(info)) { - m_notLoaded.remove(info); - } else { - qCCritical(generic) << "[ImdbTvShowScrapeJob] Loaded detail that should not be loaded?" - << static_cast(info); + if (data.imdbId.isValid()) { + tvShow().setImdbId(data.imdbId); } -} -void ImdbTvShowScrapeJob::checkIfDone() -{ - if (m_notLoaded.isEmpty()) { - emitFinished(); + // Title: use localized title if available + if (data.localizedTitle.hasValue()) { + tvShow().setTitle(data.localizedTitle.value); + if (data.originalTitle.hasValue()) { + tvShow().setOriginalTitle(data.originalTitle.value); + } else if (data.title.hasValue()) { + tvShow().setOriginalTitle(data.title.value); + } + } else if (data.title.hasValue()) { + tvShow().setTitle(data.title.value); + if (data.originalTitle.hasValue()) { + tvShow().setOriginalTitle(data.originalTitle.value); + } + } + + if (data.overview.hasValue()) { + tvShow().setOverview(data.overview.value); + } + if (data.certification.hasValue()) { + tvShow().setCertification(data.certification.value); + } + if (data.released.hasValue()) { + tvShow().setFirstAired(data.released.value); + } + if (data.runtime.hasValue()) { + tvShow().setRuntime(data.runtime.value); + } + for (const Rating& rating : data.ratings) { + tvShow().ratings().addRating(rating); + } + for (const QString& genre : data.genres) { + tvShow().addGenre(genre); + } + for (const QString& keyword : data.keywords) { + tvShow().addTag(keyword); + } + for (const Actor& actor : data.actors) { + tvShow().addActor(actor); + } + if (data.poster.hasValue()) { + tvShow().addPoster(data.poster.value); + } + for (const Poster& backdrop : data.backdrops) { + tvShow().addBackdrop(backdrop); + } + // Note: IMDB GraphQL API has no dedicated "network" field for TV shows. + // Use TMDb in the Custom TV Scraper for network information. + if (data.isOngoing.hasValue()) { + tvShow().setStatus(data.isOngoing.value ? "Continuing" : "Ended"); } } diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.h b/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.h index 669539aba8..d68ae2d108 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.h +++ b/src/scrapers/tv_show/imdb/ImdbTvShowScrapeJob.h @@ -2,7 +2,6 @@ #include "scrapers/imdb/ImdbApi.h" #include "scrapers/tv_show/ShowScrapeJob.h" -#include "scrapers/tv_show/imdb/ImdbTvShowParser.h" namespace mediaelch { namespace scraper { @@ -17,17 +16,10 @@ class ImdbTvShowScrapeJob : public ShowScrapeJob void doStart() override; private: - void loadTvShow(); - - bool shouldLoad(ShowScraperInfo info); - void setIsLoaded(ShowScraperInfo info); - void checkIfDone(); + void parseAndAssignInfos(const QString& json); private: ImdbApi& m_api; - ImdbTvShowParser m_parser; - QSet m_notLoaded; - QSet m_supports; ImdbId m_id; }; diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.cpp b/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.cpp index bf2bd1f5fc..1c3a4c9902 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.cpp +++ b/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.cpp @@ -1,13 +1,11 @@ #include "scrapers/tv_show/imdb/ImdbTvShowSearchJob.h" -#include "data/tv_show/TvShow.h" -#include "scrapers/ScraperUtils.h" -#include "scrapers/tv_show/imdb/ImdbTvShowParser.h" - -#include - +#include "log/Log.h" #include "scrapers/imdb/ImdbSearchPage.h" +#include +#include + namespace mediaelch { namespace scraper { @@ -25,26 +23,16 @@ void ImdbTvShowSearchJob::doStart() } } - void ImdbTvShowSearchJob::searchViaImdbId() { MediaElch_Debug_Ensures(ImdbId::isValidFormat(config().query)); - ImdbId id = ImdbId(config().query); - m_api.loadTitle(config().locale, id, ImdbApi::PageKind::Reference, [this](QString html, ScraperError error) { - if (!error.hasError()) { - TvShow show; - ImdbTvShowParser parser(show, config().locale); - error = parser.parseInfos(html); - if (!error.hasError() && !show.title().isEmpty()) { - ShowSearchJob::Result result; - result.title = show.title(); - result.identifier = ShowIdentifier(config().query); - result.released = show.firstAired(); - m_results.push_back(std::move(result)); - } + m_api.loadTitleViaGraphQL(ImdbId(config().query), [this](QString data, ScraperError error) { + if (error.hasError()) { + setScraperError(error); + } else { + parseGraphQLResult(data); } - setScraperError(error); emitFinished(); }); } @@ -53,57 +41,60 @@ void ImdbTvShowSearchJob::searchViaQuery() { MediaElch_Debug_Ensures(!ImdbId::isValidFormat(config().query)); - m_api.searchForShow(config().locale, config().query, [this](QString html, ScraperError error) { + m_api.suggestSearch(config().query, [this](QString data, ScraperError error) { if (error.hasError()) { - // pass; already set - } else if (html.isEmpty()) { - error.error = ScraperError::Type::NetworkError; - error.message = tr("Loaded IMDb web page content is empty. Cannot scrape requested TV show."); - - } else if (is404(html)) { - error.error = ScraperError::Type::InternalError; - error.message = tr("Could not find result table in the scraped HTML. " - "Please contact MediaElch's developers."); - + setScraperError(error); + } else if (data.isEmpty()) { + ScraperError emptyError; + emptyError.error = ScraperError::Type::NetworkError; + emptyError.message = tr("Loaded IMDb suggest response is empty. Cannot scrape requested TV show."); + setScraperError(emptyError); } else { - m_results = parseSearch(html); + parseSuggestResults(data); } - setScraperError(error); emitFinished(); }); } -QVector ImdbTvShowSearchJob::parseSearch(const QString& html) +void ImdbTvShowSearchJob::parseSuggestResults(const QString& json) { - auto results = ImdbSearchPage::parseSearch(html); - QVector showResults; + const QStringList tvTypes{"tvSeries", "tvMiniSeries"}; + auto results = ImdbSearchPage::parseSuggestResponse(json, tvTypes); for (const auto& result : results) { - showResults << ShowSearchJob::Result{result.title, result.released, ShowIdentifier{result.identifier}}; + m_results << ShowSearchJob::Result{result.title, result.released, ShowIdentifier{result.identifier}}; } - return showResults; } -QVector ImdbTvShowSearchJob::parseResultFromShowPage(const QString& html) +void ImdbTvShowSearchJob::parseGraphQLResult(const QString& json) { - QRegularExpression rx(R"(([^<]+?) \(TV Series (\d{4})–(\d{4}| )\) - IMDb)"); - QRegularExpressionMatch match = rx.match(html); - if (!match.hasMatch()) { - return {}; + QJsonParseError parseError{}; + const QJsonDocument doc = QJsonDocument::fromJson(json.toUtf8(), &parseError); + if (parseError.error != QJsonParseError::NoError) { + qCWarning(generic) << "[ImdbTvShowSearchJob] JSON parse error:" << parseError.errorString(); + return; + } + + const QJsonObject title = doc.object().value("data").toObject().value("title").toObject(); + if (title.isEmpty()) { + return; } ShowSearchJob::Result result; result.identifier = ShowIdentifier(config().query); - result.title = match.captured(1); - result.released = QDate::fromString(match.captured(2), "yyyy"); - - return {result}; -} + result.title = title.value("titleText").toObject().value("text").toString(); + + const QJsonObject releaseDate = title.value("releaseDate").toObject(); + const int year = releaseDate.value("year").toInt(0); + if (year > 0) { + const int month = releaseDate.value("month").toInt(1); + const int day = releaseDate.value("day").toInt(1); + result.released = QDate(year, month, day); + } -bool ImdbTvShowSearchJob::is404(const QString& html) const -{ - return QRegularExpression(R"(404 Error)").match(html).hasMatch(); + if (!result.title.isEmpty()) { + m_results << result; + } } - } // namespace scraper } // namespace mediaelch diff --git a/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.h b/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.h index df9c4bd005..502409ad3b 100644 --- a/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.h +++ b/src/scrapers/tv_show/imdb/ImdbTvShowSearchJob.h @@ -19,12 +19,8 @@ class ImdbTvShowSearchJob : public ShowSearchJob void searchViaImdbId(); void searchViaQuery(); - QVector<ShowSearchJob::Result> parseSearch(const QString& html); - QVector<ShowSearchJob::Result> parseResultFromShowPage(const QString& html); - /// \brief Check if the HTML page is a 404 page - /// \details IMDb does not return a 404 status code but instead a 204 one with - /// a page that says "404 Error". - bool is404(const QString& html) const; + void parseSuggestResults(const QString& json); + void parseGraphQLResult(const QString& json); private: ImdbApi& m_api; diff --git a/src/ui/scrapers/movie/ImdbMovieConfigurationView.cpp b/src/ui/scrapers/movie/ImdbMovieConfigurationView.cpp index 6ddab2b7da..4a41136f07 100644 --- a/src/ui/scrapers/movie/ImdbMovieConfigurationView.cpp +++ b/src/ui/scrapers/movie/ImdbMovieConfigurationView.cpp @@ -8,14 +8,29 @@ namespace scraper { ImdbMovieConfigurationView::ImdbMovieConfigurationView(ImdbMovieConfiguration& settings, QWidget* parent) : QWidget(parent), m_settings(settings) { + m_languageBox = new LanguageCombo(this); + m_languageBox->setupLanguages(m_settings.supportedLanguages(), m_settings.language()); + m_chkAllTags = new QCheckBox(tr("Load all tags"), this); m_chkAllTags->setObjectName("chkAllTags"); auto* layout = new QGridLayout(this); - layout->addWidget(m_chkAllTags, 0, 0); + layout->addWidget(new QLabel(tr("Language")), 0, 0); + layout->addWidget(m_languageBox, 0, 1); + layout->addWidget(m_chkAllTags, 1, 0, 1, 2); + layout->setColumnStretch(2, 1); + layout->setContentsMargins(12, 0, 12, 12); m_chkAllTags->setChecked(m_settings.shouldLoadAllTags()); + connect(m_languageBox, &LanguageCombo::languageChanged, this, [this]() { + m_settings.setLanguage(m_languageBox->currentLocale()); + }); + connect(&m_settings, &ImdbMovieConfiguration::languageChanged, this, [this](Locale language) { + const bool blocked = m_languageBox->blockSignals(true); + m_languageBox->setLanguage(language); + m_languageBox->blockSignals(blocked); + }); connect(m_chkAllTags, &QCheckBox::toggled, this, [this](bool activated) { // m_settings.setLoadAllTags(activated); }); diff --git a/src/ui/scrapers/movie/ImdbMovieConfigurationView.h b/src/ui/scrapers/movie/ImdbMovieConfigurationView.h index 5e1c3e71fc..eab687d710 100644 --- a/src/ui/scrapers/movie/ImdbMovieConfigurationView.h +++ b/src/ui/scrapers/movie/ImdbMovieConfigurationView.h @@ -1,6 +1,7 @@ #pragma once #include "scrapers/movie/imdb/ImdbMovieConfiguration.h" +#include "ui/small_widgets/LanguageCombo.h" #include <QCheckBox> #include <QPointer> @@ -20,6 +21,7 @@ class ImdbMovieConfigurationView : public QWidget private: ImdbMovieConfiguration& m_settings; + LanguageCombo* m_languageBox = nullptr; QCheckBox* m_chkAllTags = nullptr; }; diff --git a/test/resources/scrapers/imdb/Finding_Dory_tt2277860.ref.txt b/test/resources/scrapers/imdb/Finding_Dory_tt2277860.ref.txt index 1283f5f5b2..9f58c512a4 100644 --- a/test/resources/scrapers/imdb/Finding_Dory_tt2277860.ref.txt +++ b/test/resources/scrapers/imdb/Finding_Dory_tt2277860.ref.txt @@ -201,8 +201,8 @@ released: 2016-06-17 runtime: 97min writer: Andrew Stanton, Victoria Strouse director: Angus MacLane, Andrew Stanton -actors: (N>5) - - id: +actors: (N>60) + - id: nm0001122 name: Ellen DeGeneres role: Dory thumb: @@ -210,7 +210,7 @@ actors: (N>5) Mw@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0000983 name: Albert Brooks role: Marlin thumb: @@ -218,7 +218,7 @@ actors: (N>5) NQ@@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0642145 name: Ed O'Neill role: Hank thumb: @@ -226,7 +226,7 @@ actors: (N>5) NzE@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0647698 name: Kaitlin Olson role: Destiny thumb: @@ -234,7 +234,7 @@ actors: (N>5) .jpg order: 3 imageHasChanged: false - - id: + - id: nm5348776 name: Hayden Rolence role: Nemo thumb: @@ -242,7 +242,7 @@ actors: (N>5) OTE@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0123092 name: Ty Burrell role: Bailey thumb: @@ -250,7 +250,7 @@ actors: (N>5) NA@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0000473 name: Diane Keaton role: Jenny thumb: @@ -258,7 +258,7 @@ actors: (N>5) NzM@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0506405 name: Eugene Levy role: Charlie thumb: @@ -266,13 +266,13 @@ actors: (N>5) .jpg order: 7 imageHasChanged: false - - id: + - id: nm8204463 name: Sloane Murray role: Young Dory thumb: order: 8 imageHasChanged: false - - id: + - id: nm0252961 name: Idris Elba role: Fluke thumb: @@ -280,6 +280,81 @@ actors: (N>5) OA@@.jpg order: 9 imageHasChanged: false + - id: nm0922035 + name: Dominic West + role: Rudder + thumb: + https://m.media-amazon.com/images/M/MV5BMjM1MDU1Mzg3N15BMl5BanBnXkFtZTgwNTcwNzcy + MzI@.jpg + order: 10 + imageHasChanged: false + - id: nm0677037 + name: Bob Peterson + role: Mr. Ray + thumb: + https://m.media-amazon.com/images/M/MV5BMTU4NTQ4ODI5N15BMl5BanBnXkFtZTYwNzIwODA3 + .jpg + order: 11 + imageHasChanged: false + - id: nm0571952 + name: Kate McKinnon + role: Wife Fish + thumb: + https://m.media-amazon.com/images/M/MV5BMjQwMzEwMDQ2NF5BMl5BanBnXkFtZTgwMzU4NTY4 + NjE@.jpg + order: 12 + imageHasChanged: false + - id: nm0352778 + name: Bill Hader + role: Husband Fish (Stan) + thumb: + https://m.media-amazon.com/images/M/MV5BNTY3MzgwMjE3N15BMl5BanBnXkFtZTcwNjc2MjE3 + NA@@.jpg + order: 13 + imageHasChanged: false + - id: nm0000244 + name: Sigourney Weaver + role: Sigourney Weaver + thumb: + https://m.media-amazon.com/images/M/MV5BMTk1MTcyNTE3OV5BMl5BanBnXkFtZTcwMTA0MTMy + Mw@@.jpg + order: 14 + imageHasChanged: false + - id: nm1071252 + name: Alexander Gould + role: Passenger Carl + thumb: + https://m.media-amazon.com/images/M/MV5BNzBlOWI2MDEtNjhkMi00ZjAxLWFhMTctYjMzYWYz + MjI2MjBlXkEyXkFqcGc@.jpg + order: 15 + imageHasChanged: false + - id: nm0120187 + name: Torbin Xan Bullock + role: Gerald + thumb: + order: 16 + imageHasChanged: false + - id: nm0004056 + name: Andrew Stanton + role: Crush + thumb: + https://m.media-amazon.com/images/M/MV5BMmZiOTE4NDktMmZjNi00MzcwLWJjMzAtYWVlZDUw + NjhiMGIwXkEyXkFqcGc@.jpg + order: 17 + imageHasChanged: false + - id: nm0727611 + name: Katherine Ringgold + role: Chickenfish + thumb: + order: 18 + imageHasChanged: false + - id: nm8204464 + name: Lucia Geddes + role: Tween Dory + thumb: + order: 19 + imageHasChanged: false + - ... and >40 more certification: PG genres: (N>5) - Animation @@ -288,18 +363,19 @@ genres: (N>5) - Adventure - Mystery - ... and =1 more -tags: (N<6) - - family relationships - - whale - - father son relationship - - ocean - - fish +tags: (N>100) + - animal character name in title + - talking in sleep + - billion dollar club + - animals family + - 3 dimensional + - ... and >90 more countries: (N=1) - US studios: (N<6) - Walt Disney Pictures - Pixar Animation Studios -trailer: https://imdb-video.media-imdb.com/vi2669917209/1434659454657-dx9ykf-1464098554017.mp4?Expires=1774010682&Signature=QORVKTDOsjazLU8fTevIduj3ja3EGwdu6i89FIVjS9pPXNuX1x4jlj1NrSgOS7FBC1eEEd-xelPdvqShM7F8gyoiXIQoCUO3ElLnAH2NCZO5UCf8XGEVBAT2zcQNvgomA~72xnMX1PbhcI-kcIHhvVzsOyY0Ni15pKt6GjZSd4uV7Rgq70nPxuLi0aiDSG2k-ezYg8WDrwwxHql9Eex0taFFxYJURGE-fO~oQUKWYJAWQkfNQWp6~1wQXAiELZzzpodObnPIsyDn86loGaO2Sn3sfDvm8bAzCiQx5GKn1gar7bhTPVctWA6HeEVsTbwCcJKO~fSu5lq7W3y0hFNQSg__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA +trailer: https://www.imdb.com/video/vi2669917209/ showlink: (N=0) playcount: 0 lastPlayed: <not set or invalid> diff --git a/test/resources/scrapers/imdb/Godfather_tt0068646.ref.txt b/test/resources/scrapers/imdb/Godfather_tt0068646.ref.txt index 83bd6264ab..0a9014096c 100644 --- a/test/resources/scrapers/imdb/Godfather_tt0068646.ref.txt +++ b/test/resources/scrapers/imdb/Godfather_tt0068646.ref.txt @@ -83,13 +83,13 @@ ratings (N<6) source=imdb | rating=9.2 | votes=2200000 | min=0 | max=10 source=metacritic | rating=100 | votes=0 | min=0 | max=100 userRating: 0 -imdbTop250: 2 +imdbTop250: -1 released: 1972-03-24 runtime: 175min writer: Francis Ford Coppola, Mario Puzo director: Francis Ford Coppola -actors: (N>5) - - id: +actors: (N>80) + - id: nm0000008 name: Marlon Brando role: Don Vito Corleone thumb: @@ -97,7 +97,7 @@ actors: (N>5) NA@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0000199 name: Al Pacino role: Michael thumb: @@ -105,7 +105,7 @@ actors: (N>5) .jpg order: 1 imageHasChanged: false - - id: + - id: nm0001001 name: James Caan role: Sonny thumb: @@ -113,7 +113,7 @@ actors: (N>5) Mw@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0144710 name: Richard S. Castellano role: Clemenza thumb: @@ -121,7 +121,7 @@ actors: (N>5) OA@@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0000380 name: Robert Duvall role: Tom Hagen thumb: @@ -129,7 +129,7 @@ actors: (N>5) Mg@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0001330 name: Sterling Hayden role: Capt. McCluskey thumb: @@ -137,7 +137,7 @@ actors: (N>5) Mg@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0549134 name: John Marley role: Jack Woltz thumb: @@ -145,7 +145,7 @@ actors: (N>5) OA@@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0002017 name: Richard Conte role: Barzini thumb: @@ -153,7 +153,7 @@ actors: (N>5) OA@@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0504803 name: Al Lettieri role: Sollozzo thumb: @@ -161,7 +161,7 @@ actors: (N>5) ODdhNjQ1XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0000473 name: Diane Keaton role: Kay Adams thumb: @@ -169,23 +169,103 @@ actors: (N>5) NzM@.jpg order: 9 imageHasChanged: false + - id: nm0001820 + name: Abe Vigoda + role: Tessio + thumb: + https://m.media-amazon.com/images/M/MV5BMjE1MDk5NzMyN15BMl5BanBnXkFtZTYwMjA4Mjg1 + .jpg + order: 10 + imageHasChanged: false + - id: nm0001735 + name: Talia Shire + role: Connie + thumb: + https://m.media-amazon.com/images/M/MV5BMTkwMzc0NjQzNV5BMl5BanBnXkFtZTYwNzM0NTk3 + .jpg + order: 11 + imageHasChanged: false + - id: nm0751625 + name: Gianni Russo + role: Carlo + thumb: + https://m.media-amazon.com/images/M/MV5BNTgyMTgxODM4MV5BMl5BanBnXkFtZTcwNDg5NDYw + OA@@.jpg + order: 12 + imageHasChanged: false + - id: nm0001030 + name: John Cazale + role: Fredo + thumb: + https://m.media-amazon.com/images/M/MV5BMTUzMTM1MjI5NV5BMl5BanBnXkFtZTcwMTM5NTM1 + Mw@@.jpg + order: 13 + imageHasChanged: false + - id: nm0094036 + name: Rudy Bond + role: Cuneo + thumb: + https://m.media-amazon.com/images/M/MV5BZDNlZWQzOTQtZTAxNy00NjUxLWIzN2MtMDVlN2Rl + MzNlYzY0XkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm0553887 + name: Al Martino + role: Johnny Fontane + thumb: + https://m.media-amazon.com/images/M/MV5BMjMyMDk4MzYyMl5BMl5BanBnXkFtZTcwNzg5NDYw + OA@@.jpg + order: 15 + imageHasChanged: false + - id: nm0455088 + name: Morgana King + role: Mama Corleone + thumb: + https://m.media-amazon.com/images/M/MV5BODg5OTAxNDQzMl5BMl5BanBnXkFtZTgwOTM3ODIx + NjM@.jpg + order: 16 + imageHasChanged: false + - id: nm0598926 + name: Lenny Montana + role: Luca Brasi + thumb: + https://m.media-amazon.com/images/M/MV5BZWVkODE4NjktMDQ2NS00NGRlLTg4ZGUtOGFmNTJl + YWMxZmQ2XkEyXkFqcGc@.jpg + order: 17 + imageHasChanged: false + - id: nm0553908 + name: John Martino + role: Paulie Gatto + thumb: + https://m.media-amazon.com/images/M/MV5BMTUzNTgzNTg5MV5BMl5BanBnXkFtZTgwOTMzODU3 + NjE@.jpg + order: 18 + imageHasChanged: false + - id: nm0181128 + name: Salvatore Corsitto + role: Bonasera + thumb: + order: 19 + imageHasChanged: false + - ... and >60 more certification: R genres: (N<6) - Crime - Drama -tags: (N<6) +tags: (N>100) + - undertaker + - wedding - patriarch - - gangster - - crime family - - mafia - - organized crime + - heroin trafficking + - 1940s + - ... and >90 more countries: (N=1) - US studios: (N<6) - Albert S. Ruddy Productions - Paramount Pictures - Alfran Productions -trailer: https://imdb-video.media-imdb.com/vi1348706585/1434659529640-260ouz-1616202346191.mp4?Expires=1774010702&Signature=TGCxYbplTX2nAH5ntUSlLyFmVazEcrr5gDAYeoKel2XhHBYSdMn93f~Uda~oygC3D6jDd-YjLWlQXqvwbXM6WMglD9F66BSpXJdVTXGkknfAZye9pFsRVt2nuUR5uXKx6iot0Kc7jDNu6X60X9BK7OO1HASfApc4LyVyEy-dRSRQ1ukZSPCrwzsPEQEfNXJ7uhl3aPJAyTL2agTW-oRFPaM5NwxbaKPwPfvKk-7CoVchDHEC~57hxJ9va22NJ2Jwdf4h2gaCtlP1Qa53ENiJT8pkLnBBl7FU~Of-GE6W7bFSANxSwDjm5u2Q7-~Xmhn5EW-HKVQHJrQWBrQmyJ5AYw__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA +trailer: https://www.imdb.com/video/vi1348706585/ showlink: (N=0) playcount: 0 lastPlayed: <not set or invalid> diff --git a/test/resources/scrapers/imdb/Pacific_Rim_tt1663662.ref.txt b/test/resources/scrapers/imdb/Pacific_Rim_tt1663662.ref.txt index d1f78f69b0..f8b69face6 100644 --- a/test/resources/scrapers/imdb/Pacific_Rim_tt1663662.ref.txt +++ b/test/resources/scrapers/imdb/Pacific_Rim_tt1663662.ref.txt @@ -148,8 +148,8 @@ released: 2013-07-12 runtime: 131min writer: Travis Beacham, Guillermo del Toro director: Guillermo del Toro -actors: (N>5) - - id: +actors: (N>90) + - id: nm0402271 name: Charlie Hunnam role: Raleigh Becket thumb: @@ -157,7 +157,7 @@ actors: (N>5) NTgyMjFhXkEyXkFqcGc@.jpg order: 0 imageHasChanged: false - - id: + - id: nm1459109 name: Diego Klattenhoff role: Yancy Becket thumb: @@ -165,7 +165,7 @@ actors: (N>5) ODE@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0252961 name: Idris Elba role: Stacker Pentecost thumb: @@ -173,7 +173,7 @@ actors: (N>5) OA@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0452860 name: Rinko Kikuchi role: Mako Mori thumb: @@ -181,7 +181,7 @@ actors: (N>5) .jpg order: 3 imageHasChanged: false - - id: + - id: nm0206359 name: Charlie Day role: Dr. Newton Geiszler thumb: @@ -189,7 +189,7 @@ actors: (N>5) MzgyY2NiXkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm1218607 name: Burn Gorman role: Gottlieb thumb: @@ -197,7 +197,7 @@ actors: (N>5) ZjU3ZmQ3XkEyXkFqcGc@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0242882 name: Max Martini role: Herc Hansen thumb: @@ -205,7 +205,7 @@ actors: (N>5) MGFlMmUzXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm2059117 name: Robert Kazinsky role: Chuck Hansen thumb: @@ -213,7 +213,7 @@ actors: (N>5) NzM@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0004286 name: Clifton Collins Jr. role: Ops Tendo Choi thumb: @@ -221,7 +221,7 @@ actors: (N>5) MQ@@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0000579 name: Ron Perlman role: Hannibal Chau thumb: @@ -229,24 +229,108 @@ actors: (N>5) Ng@@.jpg order: 9 imageHasChanged: false + - id: nm0377034 + name: Brad William Henke + role: Construction Foreman + thumb: + https://m.media-amazon.com/images/M/MV5BMTM4NTE3NjU1Nl5BMl5BanBnXkFtZTcwMjQwNzcy + MQ@@.jpg + order: 10 + imageHasChanged: false + - id: nm0132660 + name: Larry Joe Campbell + role: Construction Worker + thumb: + https://m.media-amazon.com/images/M/MV5BYjE4ZjI1YmMtM2ZiMS00YzJjLWJhOGUtYTk4NGM4 + N2NkMmRkXkEyXkFqcGc@.jpg + order: 11 + imageHasChanged: false + - id: nm3870544 + name: Mana Ashida + role: Young Mako + thumb: + https://m.media-amazon.com/images/M/MV5BYTBhMmM1Y2ItODdjMy00NWFkLWI2ODUtM2ExY2Fl + ODhiMzNmXkEyXkFqcGc@.jpg + order: 12 + imageHasChanged: false + - id: nm0782213 + name: Santiago Segura + role: Wizened Man + thumb: + https://m.media-amazon.com/images/M/MV5BMmUwZThhMDgtNWZlOS00OGM3LTlhYWMtYzM2ZTk3 + ODQ3OTZmXkEyXkFqcGc@.jpg + order: 13 + imageHasChanged: false + - id: nm0684187 + name: Joe Pingue + role: Captain Merrit + thumb: + https://m.media-amazon.com/images/M/MV5BYjRmNDc0Y2YtMjUwNS00NjYyLTg4NjAtZTkyNDky + ZWExY2E1XkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm0055712 + name: Milton Barnes + role: McTighe + thumb: + https://m.media-amazon.com/images/M/MV5BMWI3MDY4OWYtOTJkNi00M2NjLWFhY2ItNjliZjUx + YjQxZDU4XkEyXkFqcGc@.jpg + order: 15 + imageHasChanged: false + - id: nm0290860 + name: Brian Frank + role: 1st Officer + thumb: + https://m.media-amazon.com/images/M/MV5BZmUxZTBhYjgtZTI0ZS00OTg4LWIwMmUtMGE3Y2Nl + ZWYxOGM5XkEyXkFqcGc@.jpg + order: 16 + imageHasChanged: false + - id: nm1655889 + name: Ellen McLain + role: Gipsy Danger AI + thumb: + https://m.media-amazon.com/images/M/MV5BOTk4NjU4MDM1M15BMl5BanBnXkFtZTcwNjE3MTYw + NQ@@.jpg + order: 17 + imageHasChanged: false + - id: nm0288944 + name: David Fox + role: Old Man on Beach + thumb: + https://m.media-amazon.com/images/M/MV5BMTkzNzU1Njc1M15BMl5BanBnXkFtZTgwNDU0MDg3 + MjE@.jpg + order: 18 + imageHasChanged: false + - id: nm3174869 + name: Jake Goodman + role: Child + thumb: + https://m.media-amazon.com/images/M/MV5BMjIxMzk0ODMxMV5BMl5BanBnXkFtZTgwMjU2NTIw + MzE@.jpg + order: 19 + imageHasChanged: false + - ... and >70 more certification: PG-13 genres: (N<6) - Sci-Fi - Action - Adventure -tags: (N<6) - - human piloted robot - - pilot - - surrealism - - 2020s - - kaiju -countries: (N=1) +tags: (N>100) + - 2010s + - robot sci fi + - exoskeleton + - loss of brother + - male female friendship + - ... and >90 more +countries: (N<6) - MX + - US studios: (N<6) - Warner Bros. + - Double Dare You (DDY) - Legendary Entertainment - Legendary Pictures -trailer: https://imdb-video.media-imdb.com/vi1369752345/1434659529640-260ouz-1632281728566.mp4?Expires=1774010695&Signature=okyiMQqUnXOb3ePwJJIZAzOfAzksoKsuM9f~zefavZqx00BoPNVD2lpuIzvocgRhUfHIgIRNDwtmPUQGpCRLvf02T8iVUEquFQ4Nmyjcml7WWtzpJg-i4bDH5HMMuTbETgD6FVQxY7jF-6Eg6eYdRBz4rA-JXlCnZkgrOnvXa2HzetM6kvmWk22~Y127cuS46-svj1PrfPDyJjY7OUpkb7lWafD3P2BVhZIHfUA~I~ntJqKjQwxxK3ojTlquL3vW-XgGXqLHQMCyn3v7-KdjfDpa1E-FRWHd-wGa~0ppH0uOhk-GgEvVUw4y6WiDh7X4SV7M~bXRGtx9Yp2mYwRI7g__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA +trailer: https://www.imdb.com/video/vi1369752345/ showlink: (N=0) playcount: 0 lastPlayed: <not set or invalid> diff --git a/test/resources/scrapers/imdb/The_Shawshank_Redemption_tt0111161.ref.txt b/test/resources/scrapers/imdb/The_Shawshank_Redemption_tt0111161.ref.txt index 2524d943af..e487c7e296 100644 --- a/test/resources/scrapers/imdb/The_Shawshank_Redemption_tt0111161.ref.txt +++ b/test/resources/scrapers/imdb/The_Shawshank_Redemption_tt0111161.ref.txt @@ -219,9 +219,9 @@ Red violates parole and leaves the halfway house, unconcerned sinc inally reunited on the beach of Zihuatanejo on the Pacific coast, and share a ha ppy embrace. outline: - A banker convicted of uxoricide forms a friendship over a quarter century with a - hardened convict, while maintaining his innocence and trying to remain hopeful - through simple compassion. + A wrongfully convicted banker forms a close friendship with a hardened convict o + ver a quarter century while retaining his humanity through simple acts of compas + sion. movie set: tmdbid= | name= movie set overview: tagline: Fear can hold you prisoner. Hope can set you free. @@ -229,13 +229,13 @@ ratings (N<6) source=imdb | rating=9.2 | votes=3100000 | min=0 | max=10 source=metacritic | rating=82 | votes=0 | min=0 | max=100 userRating: 0 -imdbTop250: 1 +imdbTop250: -1 released: 1994-10-14 runtime: 142min writer: Stephen King, Frank Darabont director: Frank Darabont -actors: (N>5) - - id: +actors: (N>70) + - id: nm0000209 name: Tim Robbins role: Andy Dufresne thumb: @@ -243,7 +243,7 @@ actors: (N>5) .jpg order: 0 imageHasChanged: false - - id: + - id: nm0000151 name: Morgan Freeman role: Ellis Boyd 'Red' Redding thumb: @@ -251,7 +251,7 @@ actors: (N>5) MQ@@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0348409 name: Bob Gunton role: Warden Norton thumb: @@ -259,7 +259,7 @@ actors: (N>5) ZWIwMDJkXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0006669 name: William Sadler role: Heywood thumb: @@ -267,7 +267,7 @@ actors: (N>5) Mw@@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0000317 name: Clancy Brown role: Captain Hadley thumb: @@ -275,7 +275,7 @@ actors: (N>5) Ng@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0004743 name: Gil Bellows role: Tommy thumb: @@ -283,7 +283,7 @@ actors: (N>5) MzE@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0001679 name: Mark Rolston role: Bogs Diamond thumb: @@ -291,7 +291,7 @@ actors: (N>5) ZmMyZGRmXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0926235 name: James Whitmore role: Brooks Hatlen thumb: @@ -299,7 +299,7 @@ actors: (N>5) ZjUyNzRhXkEyXkFqcGc@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0218810 name: Jeffrey DeMunn role: 1946 D.A. thumb: @@ -307,7 +307,7 @@ actors: (N>5) OA@@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0104594 name: Larry Brandenburg role: Skeet thumb: @@ -315,20 +315,94 @@ actors: (N>5) ZmFlMDg5XkEyXkFqcGc@.jpg order: 9 imageHasChanged: false + - id: nm0321358 + name: Neil Giuntoli + role: Jigger + thumb: + https://m.media-amazon.com/images/M/MV5BMjI0OTUxNjIyNF5BMl5BanBnXkFtZTcwNDE0MDcw + OA@@.jpg + order: 10 + imageHasChanged: false + - id: nm0508742 + name: Brian Libby + role: Floyd + thumb: + https://m.media-amazon.com/images/M/MV5BMjI2NDYwNzU0NV5BMl5BanBnXkFtZTcwMjYwMTcw + OA@@.jpg + order: 11 + imageHasChanged: false + - id: nm0698998 + name: David Proval + role: Snooze + thumb: + https://m.media-amazon.com/images/M/MV5BMjE0MTU0NjU3Nl5BMl5BanBnXkFtZTYwNTgzNjY0 + .jpg + order: 12 + imageHasChanged: false + - id: nm0706554 + name: Joseph Ragno + role: Ernie + thumb: + https://m.media-amazon.com/images/M/MV5BMTQyNzMyNTUwNV5BMl5BanBnXkFtZTcwNjQ2MTcw + OA@@.jpg + order: 13 + imageHasChanged: false + - id: nm0161980 + name: Jude Ciccolella + role: Guard Mert + thumb: + https://m.media-amazon.com/images/M/MV5BYTFkYzlhMGQtZjYzYy00YmY0LTg3N2MtMDcwMTU0 + MzFjYjA0XkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm0005204 + name: Paul McCrane + role: Guard Trout + thumb: + https://m.media-amazon.com/images/M/MV5BMTI2Mjc1NzExN15BMl5BanBnXkFtZTYwNjUzMzQz + .jpg + order: 15 + imageHasChanged: false + - id: nm0086169 + name: Renee Blaine + role: Andy Dufresne's Wife + thumb: + order: 16 + imageHasChanged: false + - id: nm0542957 + name: Scott Mann + role: Glenn Quentin + thumb: + order: 17 + imageHasChanged: false + - id: nm0395612 + name: John Horton + role: 1946 Judge + thumb: + order: 18 + imageHasChanged: false + - id: nm2939075 + name: Gordon Greene + role: 1947 Parole Hearings Man + thumb: + order: 19 + imageHasChanged: false + - ... and >50 more certification: R genres: (N=1) - Drama -tags: (N<6) - - friendship between men - - based on the works of stephen king - - wrongful conviction - - escape from prison - - prison +tags: (N>100) + - adultery + - beach + - prison yard + - 1940s + - prison rape + - ... and >90 more countries: (N=1) - US studios: (N=1) - Castle Rock Entertainment -trailer: https://imdb-video.media-imdb.com/vi3877612057/1434659454657-dx9ykf-1616202333253.mp4?Expires=1774010686&Signature=L2RC6hQe5EPkAC5JIVh2YcP3SUGcEyzr9b2064Pz8WYeJs6Tn42ee5DSaZTWW3H~mR3~PuezcZ30ZeD8qAYM01zDoskqY4CnEP7T~AO~KulvSkEsv1k9DYItp4scRgpkTKpIstmWKIm4ANqpcfZOiww8f-h1wDdobc8a5cHNlBJ-XyP2ee3coTx1-SM7bGQfiZpNrrlfNZXT66CiQ1aVarEBOLzq-sPQrzUDe~5iuS60gEZNs0fk27lsYS-dTVF5xagRD4c4zsebOcsfFmNxMiX8HYFYFxoIKrVbmgqLqbDPzTVWfNyfmH93ODyrYjp7cmC~slqncSIzzWe3IhHY6Q__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA +trailer: https://www.imdb.com/video/vi3877612057/ showlink: (N=0) playcount: 0 lastPlayed: <not set or invalid> diff --git a/test/resources/scrapers/imdb/Welcome_Back_tt3159708.ref.txt b/test/resources/scrapers/imdb/Welcome_Back_tt3159708.ref.txt index b9cd9ab102..ce6bab2a16 100644 --- a/test/resources/scrapers/imdb/Welcome_Back_tt3159708.ref.txt +++ b/test/resources/scrapers/imdb/Welcome_Back_tt3159708.ref.txt @@ -118,8 +118,8 @@ released: 2015-09-04 runtime: 152min writer: Anees Bazmee, Raaj Shaandilyaa, Rajeev Kaul, Rajan Agarwal, Praful Parekh director: Anees Bazmee, Sushma Sunam -actors: (N>5) - - id: +actors: (N>30) + - id: nm1303433 name: John Abraham role: Ajay 'Ajju' Ghunghroo thumb: @@ -127,7 +127,7 @@ actors: (N>5) .jpg order: 0 imageHasChanged: false - - id: + - id: nm0438463 name: Anil Kapoor role: Sagar 'Majnu' Pandey thumb: @@ -135,15 +135,15 @@ actors: (N>5) MWFjZTAwXkEyXkFqcGc@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0007113 name: Nana Patekar - role: Uday Shankar Shetty / Shankar Shetty + role: Uday Shankar Shetty thumb: https://m.media-amazon.com/images/M/MV5BMjI4MzQyODYwOF5BMl5BanBnXkFtZTgwMTcxMTIz OTE@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0712546 name: Paresh Rawal role: Dr. Ghunghroo thumb: @@ -151,7 +151,7 @@ actors: (N>5) OTE@.jpg order: 3 imageHasChanged: false - - id: + - id: nm1599046 name: Shruti Haasan role: Ranjana Shetty thumb: @@ -159,15 +159,15 @@ actors: (N>5) ZmJjOWJlXkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0438092 name: Dimple Kapadia - role: Poonam / Maharani Padmavati + role: Poonam thumb: https://m.media-amazon.com/images/M/MV5BNTc4MTE1NDU5MF5BMl5BanBnXkFtZTcwNDI2MTcx OA@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0710211 name: Ranjeet Bedi role: Kapoor, 'the criminal' thumb: @@ -175,7 +175,7 @@ actors: (N>5) NjM4MDMyXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0787462 name: Naseeruddin Shah role: Wanted Bhai thumb: @@ -183,7 +183,7 @@ actors: (N>5) OTE@.jpg order: 7 imageHasChanged: false - - id: + - id: nm1832004 name: Shiney Ahuja role: Honey thumb: @@ -191,12 +191,93 @@ actors: (N>5) OTE@.jpg order: 8 imageHasChanged: false - - id: + - id: nm12574452 name: Ankita Srivastava - role: Babita / Rajkumari Chandini + role: Babita thumb: order: 9 imageHasChanged: false + - id: nm0439828 + name: Supriya Karnik + role: Payal Ghungroo + thumb: + https://m.media-amazon.com/images/M/MV5BOGViYjJmNzctOGJkMy00Nzg0LWI3NGItMWRlNjU3 + YjJkZjk3XkEyXkFqcGc@.jpg + order: 10 + imageHasChanged: false + - id: nm1056425 + name: Rajpal Naurang Yadav + role: The Tailor + thumb: + https://m.media-amazon.com/images/M/MV5BMTQ3MTI4MzUyNl5BMl5BanBnXkFtZTgwMjAzMTMz + OTE@.jpg + order: 11 + imageHasChanged: false + - id: nm0903423 + name: Neeraj Vora + role: Badshah Khan + thumb: + https://m.media-amazon.com/images/M/MV5BODgzNjYxMzMwNl5BMl5BanBnXkFtZTcwODIzNTE3 + Mw@@.jpg + order: 12 + imageHasChanged: false + - id: nm0451272 + name: Mushtaq Khan + role: Balu + thumb: + https://m.media-amazon.com/images/M/MV5BYzNiODA3OTktMjA2Ni00MGI4LThlMDAtOGNhNGFh + NzA0YTUyXkEyXkFqcGc@.jpg + order: 13 + imageHasChanged: false + - id: nm0409779 + name: Adi Irani + role: Advocate Harkesh Sahni + thumb: + https://m.media-amazon.com/images/M/MV5BY2MxODlhOTMtOGM1NC00MWIzLTllNWYtYjQ2ZmQ5 + YzBlOTQxXkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm0196400 + name: Snehal Dabi + role: Kakhil + thumb: + https://m.media-amazon.com/images/M/MV5BMmM5MmI0NzMtMjkzMC00ZWM1LWE5MGEtMjQzNTY5 + MDQ2NGIyXkEyXkFqcGc@.jpg + order: 15 + imageHasChanged: false + - id: nm10304343 + name: Amjad Qureshi + role: Ajju bhai friend + thumb: + https://m.media-amazon.com/images/M/MV5BMGVlYWEyNTMtMDQ3ZC00M2YyLWFjYTAtZDU0YzVi + N2EwMmQ1XkEyXkFqcGc@.jpg + order: 16 + imageHasChanged: false + - id: nm1617168 + name: Javed Rizvi + role: Ajju's dumb and deaf friend + thumb: + https://m.media-amazon.com/images/M/MV5BZmZmOWIyYjUtYmY5ZC00Yjc3LWE4NmItYTUxMzEw + OTI2ZTI0XkEyXkFqcGc@.jpg + order: 17 + imageHasChanged: false + - id: nm2689943 + name: Lauren Gottlieb + role: Special appearance in song '20-20' + thumb: + https://m.media-amazon.com/images/M/MV5BNGM0MmE1ZTAtNjY5YS00OTc5LTlkZDctZWUyYjIy + NDFjNDE2XkEyXkFqcGc@.jpg + order: 18 + imageHasChanged: false + - id: nm4427212 + name: Reema Debnath + role: Special appearance in song '20-20' + thumb: + https://m.media-amazon.com/images/M/MV5BMDNkMGQ0Y2UtYTBjZi00MDRkLThlMjAtOGJlNjFm + NGQ4NjRmXkEyXkFqcGc@.jpg + order: 19 + imageHasChanged: false + - ... and >10 more certification: Not Rated genres: (N<6) - Crime @@ -205,11 +286,12 @@ genres: (N<6) - Drama tags: (N=1) - husband wife relationship -countries: (N=1) +countries: (N<6) - IN + - AE studios: (N=1) - Base Industries Group -trailer: https://imdb-video.media-imdb.com/vi956348441/1434659529640-260ouz-1563738237646.mp4?Expires=1774010699&Signature=jRn3Kfoft~x1gq4DnA2h4Ig9h2ZSUboNL9x2AvGkIQRnPTkEV4vxcDUAsYgnSgC8rYtHTvoSS4I2u25sDU-JoRaETNQyIYWypKscWDH0c8piJkuuxP2Lc5~Q2fs04WKuja6dX-VM3ca3keN3GXpeEpyqdTJ7yQ5FtynglmaXpICMaNaHTrhvs2dFPM1M7YvURMoZCMyLVnhKfNo0~Df3Pvb6YarEoCg9iBv8vg3bt7tMlnpXPn75--GaJzfIXJ2R-Vyxnf9mZGgIbSphB1HZkMu8~hparcIPNBpCyRcIIdKwX~FwdOnxSIEP6bJJvDzpWlWqFQUDElFQGKd5nwGlkw__&Key-Pair-Id=APKAIFLZBVQZ24NQH3KA +trailer: https://www.imdb.com/video/vi956348441/ showlink: (N=0) playcount: 0 lastPlayed: <not set or invalid> diff --git a/test/resources/scrapers/imdbtv/All-in-the-Family-S01E01-tt0509891-minimal-details.ref.txt b/test/resources/scrapers/imdbtv/All-in-the-Family-S01E01-tt0509891-minimal-details.ref.txt index 4d354cb3ea..816d9876bd 100644 --- a/test/resources/scrapers/imdbtv/All-in-the-Family-S01E01-tt0509891-minimal-details.ref.txt +++ b/test/resources/scrapers/imdbtv/All-in-the-Family-S01E01-tt0509891-minimal-details.ref.txt @@ -27,18 +27,13 @@ directors: (N=1) playCount: 0 lastPlayed: <not set or invalid> firstAired: 1971-01-12 -tags: (N<6) - - season premiere - - brunch - - italian slur - - reference to cosmopolitan magazine - - series premiere +tags: (N=0) epBookmark: <not set or invalid> certification: TV-14 networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BY2E5MDNhZmEtZmU4OS00ZGE1LTg5M2MtODg2MjJmMDU2MzliXkEyXkFqcGc@.jpg actors: (N<6) - - id: + - id: nm0005279 name: Carroll O'Connor role: Archie Bunker thumb: @@ -46,7 +41,7 @@ actors: (N<6) ODdkMDI2XkEyXkFqcGc@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0822958 name: Jean Stapleton role: Edith Bunker thumb: @@ -54,7 +49,7 @@ actors: (N<6) NQ@@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0001661 name: Rob Reiner role: Michael 'Meathead' Stivic thumb: @@ -62,7 +57,7 @@ actors: (N<6) Mw@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0001783 name: Sally Struthers role: Gloria Bunker-Stivic thumb: @@ -70,7 +65,7 @@ actors: (N<6) NDIzMDFkXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0263070 name: Mike Evans role: Lionel Jefferson thumb: diff --git a/test/resources/scrapers/imdbtv/Black-Mirror-S05.ref.txt b/test/resources/scrapers/imdbtv/Black-Mirror-S05.ref.txt index 50a82030b5..5944e851ee 100644 --- a/test/resources/scrapers/imdbtv/Black-Mirror-S05.ref.txt +++ b/test/resources/scrapers/imdbtv/Black-Mirror-S05.ref.txt @@ -30,18 +30,13 @@ directors: (N=1) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2019-06-05 -tags: (N<6) - - virtual reality addiction - - gender roles - - virtual reality - - sexuality - - confusion +tags: (N=0) epBookmark: <not set or invalid> certification: TV-MA networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BNWNkY2QwMmEtYTc3NS00MzdlLTgxMTItMWIzNWVlYTc3NGMzXkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>10) + - id: nm1107001 name: Anthony Mackie role: Danny thumb: @@ -49,7 +44,7 @@ actors: (N>5) Mg@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm5584344 name: Yahya Abdul-Mateen II role: Karl thumb: @@ -57,7 +52,7 @@ actors: (N>5) OTA3ZTI1XkEyXkFqcGc@.jpg order: 1 imageHasChanged: false - - id: + - id: nm2718512 name: Nicole Beharie role: Theo thumb: @@ -65,7 +60,7 @@ actors: (N>5) YTFjMzM1XkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm2962353 name: Pom Klementieff role: Roxette thumb: @@ -73,7 +68,7 @@ actors: (N>5) ZmM3ZjZhXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm3485108 name: Ludi Lin role: Lance thumb: @@ -81,13 +76,13 @@ actors: (N>5) YTZhMzgyXkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm10744174 name: August Muschett role: Tyler thumb: order: 5 imageHasChanged: false - - id: + - id: nm6826652 name: Fola Evans-Akingbola role: Mariella thumb: @@ -95,13 +90,13 @@ actors: (N>5) NzMzNDBmXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm10723703 name: Monique Cynthia Brown role: Daisy thumb: order: 7 imageHasChanged: false - - id: + - id: nm7186319 name: Caroline Martin role: Jemma thumb: @@ -109,7 +104,7 @@ actors: (N>5) ZWE1ZDQ2XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm1994167 name: Jordan Carlos role: Simon thumb: @@ -117,6 +112,32 @@ actors: (N>5) MzY2YTM0XkEyXkFqcGc@.jpg order: 9 imageHasChanged: false + - id: nm10744175 + name: Guilherme Vallim + role: Maxwell + thumb: + order: 10 + imageHasChanged: false + - id: nm6807611 + name: Eduardo Mossri + role: Waiter + thumb: + order: 11 + imageHasChanged: false + - id: nm4632043 + name: Austin Michael Young + role: Chester + thumb: + https://m.media-amazon.com/images/M/MV5BOGIyNGYxZTMtNzFiMi00YWI4LTg1MjEtYTczOTI1 + NGI2MDk0XkEyXkFqcGc@.jpg + order: 12 + imageHasChanged: false + - id: nm10750796 + name: Joe Parker + role: Jason + thumb: + order: 13 + imageHasChanged: false streamDetails: <not loaded> files: (N=0) @@ -151,18 +172,13 @@ directors: (N=1) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2019-06-05 -tags: (N<6) - - ride share service - - stopped by police - - reflection in a rearview mirror - - camera shot of a closed eye - - bare chested male +tags: (N=0) epBookmark: <not set or invalid> certification: TV-MA networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BNWM3YTEzZTMtOTRkMC00NzVkLTlhNzItNWFiOWYzMWU2NDI0XkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>20) + - id: nm0778831 name: Andrew Scott role: Chris Gillhaney thumb: @@ -170,7 +186,7 @@ actors: (N>5) Nw@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm5072010 name: Damson Idris role: Jaden thumb: @@ -178,7 +194,7 @@ actors: (N>5) ZDYxMjYzXkEyXkFqcGc@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0333410 name: Topher Grace role: Billy Bauer thumb: @@ -186,7 +202,7 @@ actors: (N>5) Nw@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0230826 name: Monica Dolan role: CS Linda Grace thumb: @@ -194,13 +210,13 @@ actors: (N>5) Mg@@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0237629 name: Amanda Drew role: Hayley thumb: order: 4 imageHasChanged: false - - id: + - id: nm3958526 name: Daniel Ings role: David Gilkes thumb: @@ -208,7 +224,7 @@ actors: (N>5) ZGM0ZTQ4XkEyXkFqcGc@.jpg order: 5 imageHasChanged: false - - id: + - id: nm5080543 name: Ruibo Qian role: Penelope Wu thumb: @@ -216,7 +232,7 @@ actors: (N>5) MGQwZThlXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm7578896 name: Ambreen Razia role: WPC Najma Haque thumb: @@ -224,7 +240,7 @@ actors: (N>5) N2M2ODY1XkEyXkFqcGc@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0130127 name: Calum Callaghan role: PC Damien Bullen thumb: @@ -232,7 +248,7 @@ actors: (N>5) ZTYzNTcwXkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm2354102 name: Quincy Dunn-Baker role: Don thumb: @@ -240,6 +256,77 @@ actors: (N>5) NzQ4MjdhXkEyXkFqcGc@.jpg order: 9 imageHasChanged: false + - id: nm5661850 + name: Mirirai + role: Shonelle + thumb: + order: 10 + imageHasChanged: false + - id: nm5559283 + name: Laura Morgan + role: Thalia + thumb: + https://m.media-amazon.com/images/M/MV5BNmEyYzEyMmEtZDgxOS00ODJhLTkyNjctNzI4ZGIz + YmNiMmEwXkEyXkFqcGc@.jpg + order: 11 + imageHasChanged: false + - id: nm4545107 + name: Caitlin Innes Edwards + role: Hannah + thumb: + https://m.media-amazon.com/images/M/MV5BMjkzOTEyYTEtMDE3Yy00MTlkLWFlOTgtYWUyMGZl + MzM3ZThmXkEyXkFqcGc@.jpg + order: 12 + imageHasChanged: false + - id: nm2835896 + name: Maggie Bain + role: Maryam + thumb: + order: 13 + imageHasChanged: false + - id: nm7114805 + name: Conrad Khan + role: Dibbs + thumb: + https://m.media-amazon.com/images/M/MV5BYTExNjkzNTAtMzlhNS00NTcxLTljNzgtZTIzOGI3 + MTNhZjc5XkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm7720702 + name: Archie Rush + role: Cosmo + thumb: + order: 15 + imageHasChanged: false + - id: nm0179647 + name: Jorge Cordova + role: Agent Cruz + thumb: + https://m.media-amazon.com/images/M/MV5BODBlNTQ4NDYtYTYwZS00MzgyLTk2OTctZGZhOWZk + NDg1NTlhXkEyXkFqcGc@.jpg + order: 16 + imageHasChanged: false + - id: nm5402906 + name: Crystal Clarke + role: Tipi + thumb: + https://m.media-amazon.com/images/M/MV5BMTc0NzcyOGMtY2ZlYS00N2JmLTlkNTYtY2YyZWZk + YjM4YWVkXkEyXkFqcGc@.jpg + order: 17 + imageHasChanged: false + - id: nm1351863 + name: Seun Shote + role: Simon + thumb: + order: 18 + imageHasChanged: false + - id: nm5165542 + name: Gareth Kane + role: PC Wanson + thumb: + order: 19 + imageHasChanged: false + - ... and >5 more streamDetails: <not loaded> files: (N=0) @@ -274,18 +361,13 @@ directors: (N=1) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2019-06-05 -tags: (N<6) - - white grand piano - - watching a music video - - milk bath - - modern house - - high school cafeteria +tags: (N=0) epBookmark: <not set or invalid> certification: TV-MA networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BMGViNDgzMGYtZDJiOC00ZjRkLWIwYTktNWM4ZDUyZDA1OGE2XkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>10) + - id: nm1415323 name: Miley Cyrus role: Ashley O thumb: @@ -293,7 +375,7 @@ actors: (N>5) OA@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm3886028 name: Angourie Rice role: Rachel thumb: @@ -301,7 +383,7 @@ actors: (N>5) MDQ3ZDRkXkEyXkFqcGc@.jpg order: 1 imageHasChanged: false - - id: + - id: nm1956478 name: Madison Davenport role: Jack thumb: @@ -309,7 +391,7 @@ actors: (N>5) YzU4YjgwXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm1581522 name: Susan Pourfar role: Catherine thumb: @@ -317,7 +399,7 @@ actors: (N>5) YzAyZTk3XkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0578766 name: Marc Menchaca role: Kevin thumb: @@ -325,7 +407,7 @@ actors: (N>5) MzEyYjlhXkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm3320341 name: Jerah Milligan role: Busy G thumb: @@ -333,7 +415,7 @@ actors: (N>5) ZDYxMDAwXkEyXkFqcGc@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0829916 name: Daniel Stewart Sherman role: Bear thumb: @@ -341,7 +423,7 @@ actors: (N>5) MGQzNzVkXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm3029839 name: James III role: Habanero thumb: @@ -349,7 +431,7 @@ actors: (N>5) NWMxYTcyXkEyXkFqcGc@.jpg order: 7 imageHasChanged: false - - id: + - id: nm2322286 name: Nicholas Pauling role: Dr. Munk thumb: @@ -357,7 +439,7 @@ actors: (N>5) MDgxY2RhXkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm7729639 name: Frances Sholto-Douglas role: Carmen thumb: @@ -365,5 +447,65 @@ actors: (N>5) MDE2ZjdlXkEyXkFqcGc@.jpg order: 9 imageHasChanged: false + - id: nm5105990 + name: Greg Kriek + role: Todd + thumb: + https://m.media-amazon.com/images/M/MV5BY2FlZWRkZmEtODhhYS00NWRlLTkzODMtY2NkYjUx + MTRlY2U0XkEyXkFqcGc@.jpg + order: 10 + imageHasChanged: false + - id: nm10745020 + name: Mikkie-Dené Le Roux + role: Ms Walpole + thumb: + order: 11 + imageHasChanged: false + - id: nm10745021 + name: Sive Gubangxa + role: Stage Manager + thumb: + order: 12 + imageHasChanged: false + - id: nm3678893 + name: Martin Munro + role: Caretaker + thumb: + https://m.media-amazon.com/images/M/MV5BZTZlNDc0YWYtYThmNC00ZTFhLWIzNmUtYmFjZTI2 + YmM1OGViXkEyXkFqcGc@.jpg + order: 13 + imageHasChanged: false + - id: nm10745022 + name: Alessa Gironi + role: Leah + thumb: + order: 14 + imageHasChanged: false + - id: nm7832175 + name: Savana Tardieu + role: Magnolia + thumb: + order: 15 + imageHasChanged: false + - id: nm4192157 + name: Jamie Royal + role: Nurse + thumb: + order: 16 + imageHasChanged: false + - id: nm10193440 + name: Edward J. Pepperell + role: Cop + thumb: + order: 17 + imageHasChanged: false + - id: nm6651644 + name: Charles Babalola + role: Tusk + thumb: + https://m.media-amazon.com/images/M/MV5BZTFhMmIzM2MtZmFjNS00NmE1LWE3ZjYtOWE4NGFj + YmVjZjcyXkEyXkFqcGc@.jpg + order: 18 + imageHasChanged: false streamDetails: <not loaded> files: (N=0) diff --git a/test/resources/scrapers/imdbtv/Buffy-S01E01-minimal-details.ref.txt b/test/resources/scrapers/imdbtv/Buffy-S01E01-minimal-details.ref.txt index 425adeafa2..0e99df354e 100644 --- a/test/resources/scrapers/imdbtv/Buffy-S01E01-minimal-details.ref.txt +++ b/test/resources/scrapers/imdbtv/Buffy-S01E01-minimal-details.ref.txt @@ -16,8 +16,72 @@ episode: EpisodeNumber=01 displaySeason: SeasonNumber=xx displayEpisode: EpisodeNumber=xx overview: - When teen vampire slayer Buffy tries to start a new life at Sunnydale High, she - discovers that the school sits atop a demonic dimensional portal. + The series premiere begins at Sunnydale High School, where a boy breaks into the + school during the night with a seemingly reluctant girl, promising her mischief + and, therefore, fun. Nervous and on edge, the girl thinks she heard something a + nd fears something is there. The boy calls out but gets no response, during whic + h the girl's face morphs into that of a vampire and bites the boy, revealing to + the viewer that she was the only danger all along. The girl, we will later find + out, is Darla. + +Buffy has a nightmare the morning of her first day of school. He + r mother drives her to the school and encourages her to think positive. Inside t + he building, Principal Flutie tells her she will start with a clean slate. He re + considers that after realizing that Buffy burned down her previous school's gym + because "it was full of vampi... asbestos." + +Buffy exits the office and bumps in + to another student, spilling the contents of her handbag over the floor. Xander + sees that and helps Buffy, introducing himself. She leaves without her stake, wh + ich Xander pockets because he called out to her, but she had already walked away + . In history class, Buffy is helped by Cordelia, who afterwards tests her "cooln + ess factor," skipping the written as Buffy had just moved to Sunnydale, Californ + ia from Los Angeles. To Buffy's horror, Cordelia humiliates an awkward Willow at + the water fountain. Inside the library, Mr. Giles places a book titled Vampyr i + n front of Buffy after realizing who she was. A stunned Buffy makes a hasty exit + . + +Buffy, Willow, Jesse McNally and Xander meet during a break and Xander return + s the stake. Buffy claims it is standard self-defense in Los Angeles. Cordelia a + ppears and tells Buffy that gym is canceled "due to the extreme dead guy" in Aur + a's locker. Buffy asks whether there were marks on the body, freaking out Cordel + ia. Buffy forces her way into the locker room, examines the body, and finds the + characteristic puncture wounds of a vampire on the neck. + +Buffy returns to the l + ibrary and confronts Giles, who informs her that he is her Watcher. Buffy refuse + s to accept her calling as a Slayer since it had gotten her kicked out of her pr + evious school and cost her social life. After they leave the library, Xander eme + rges from behind the shelves, having overheard the strange conversation. + +That n + ight, en route to her first visit to The Bronze, the cool hangout in Sunnydale, + Buffy meets a mysterious, handsome stranger, who warns her that she is living on + a Hellmouth that is about to open, and that "the Harvest" is coming. He also gi + ves her a large silver cross. It isn't revealed until the next episode that the + stranger's name is Angel. + +In The Bronze, Buffy meets Willow and encourages her + to seize the moment: "Because tomorrow you might be dead." She finds Giles and t + ells him about Angel. Giles tells her to learn to hone her skills to sense vampi + res anywhere. Buffy uses her fashion sense to pick out a vampire in the club and + is alarmed to see Willow leave with him. She loses them and is surprised by Cor + delia, nearly staking her. Cordelia immediately calls her friends to tell them a + bout it. While Buffy looks for Willow, Jesse chats up Darla at The Bronze. Buffy + is stopped by Xander, whom she convinces to help her search for Willow. + +Meanwh + ile, under the streets of Sunnydale, The Master is woken by lesser vampires from + a long sleep to prepare for the Harvest. He sends Luke to fetch young blood. + +W + illow's new acquaintance takes her to a crypt in a cemetery where they are joine + d by Darla and Jesse, whom she has bitten. Buffy and Xander arrive. Buffy kills + Willow's vampire. Xander and Willow help Jesse, who has been weakened, flee. Luk + e takes Darla's place in the fight so she can help catch the kids. Luke throws B + uffy in a stone coffin and is about to move in for the kill. + +TO BE CONTINUED... writers: (N<6) - Matt Kiene - Rob DesHotel @@ -30,18 +94,13 @@ directors: (N<6) playCount: 0 lastPlayed: <not set or invalid> firstAired: 1997-03-10 -tags: (N<6) - - superhero - - waking up from a nightmare - - break in - - first day of school - - skateboarding +tags: (N=0) epBookmark: <not set or invalid> certification: TV-14 networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BMTU2NDQ1NTI2MF5BMl5BanBnXkFtZTgwNDMzMTM1NjM@.jpg -actors: (N>5) - - id: +actors: (N>20) + - id: nm0001264 name: Sarah Michelle Gellar role: Buffy Summers thumb: @@ -49,7 +108,7 @@ actors: (N>5) Nw@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0107183 name: Nicholas Brendon role: Xander Harris thumb: @@ -57,7 +116,7 @@ actors: (N>5) .jpg order: 1 imageHasChanged: false - - id: + - id: nm0004989 name: Alyson Hannigan role: Willow Rosenberg thumb: @@ -65,7 +124,7 @@ actors: (N>5) Nw@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0004806 name: Charisma Carpenter role: Cordelia Chase thumb: @@ -73,7 +132,7 @@ actors: (N>5) ZDQwNmY4XkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0372117 name: Anthony Head role: Rupert Giles thumb: @@ -81,7 +140,7 @@ actors: (N>5) .jpg order: 4 imageHasChanged: false - - id: + - id: nm0582420 name: Mark Metcalf role: The Master thumb: @@ -89,7 +148,7 @@ actors: (N>5) OA@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0859921 name: Brian Thompson role: Luke thumb: @@ -97,7 +156,7 @@ actors: (N>5) .jpg order: 6 imageHasChanged: false - - id: + - id: nm0004770 name: David Boreanaz role: Angel thumb: @@ -105,7 +164,7 @@ actors: (N>5) NjYzYmJlXkEyXkFqcGc@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0503624 name: Ken Lerner role: Principal Bob Flutie thumb: @@ -113,7 +172,7 @@ actors: (N>5) MQ@@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0840133 name: Kristine Sutherland role: Joyce Summers thumb: @@ -121,5 +180,78 @@ actors: (N>5) NA@@.jpg order: 9 imageHasChanged: false + - id: nm0004748 + name: Julie Benz + role: Darla + thumb: + https://m.media-amazon.com/images/M/MV5BMmE2ZjA5MWUtMDVmNi00NzgzLWFhMjktODU2NzZi + ODhlYmEzXkEyXkFqcGc@.jpg + order: 10 + imageHasChanged: false + - id: nm0492551 + name: J. Patrick Lawlor + role: Thomas + thumb: + https://m.media-amazon.com/images/M/MV5BNmUxYTk2MzYtYmFkNS00ZmQwLTk2YjItMzYyYjM3 + N2RhZDZlXkEyXkFqcGc@.jpg + order: 11 + imageHasChanged: false + - id: nm0050156 + name: Eric Balfour + role: Jesse + thumb: + https://m.media-amazon.com/images/M/MV5BY2Y0YzdkYTctZGFiOC00ZjdhLWIxNmUtMTMxMjQx + NTkxM2RiXkEyXkFqcGc@.jpg + order: 12 + imageHasChanged: false + - id: nm0833856 + name: Natalie Strauss + role: Teacher + thumb: + https://m.media-amazon.com/images/M/MV5BNzY1MjcwODQ5NV5BMl5BanBnXkFtZTcwOTE2MDYy + MQ@@.jpg + order: 13 + imageHasChanged: false + - id: nm0320547 + name: Carmine Giovinazzo + role: Boy + thumb: + https://m.media-amazon.com/images/M/MV5BZTU5OTEzYzQtZGVhMy00MDFkLTlmMzAtNWZmYzI0 + YjBiNDliXkEyXkFqcGc@.jpg + order: 14 + imageHasChanged: false + - id: nm0151207 + name: Amy Chance + role: Girl #1 + thumb: + order: 15 + imageHasChanged: false + - id: nm1812949 + name: Tupelo Jereme + role: Girl #2 + thumb: + order: 16 + imageHasChanged: false + - id: nm0925316 + name: Persia White + role: Aura + thumb: + https://m.media-amazon.com/images/M/MV5BMTc3NjU3ODM1OV5BMl5BanBnXkFtZTYwNTEyMjYz + .jpg + order: 17 + imageHasChanged: false + - id: nm3496968 + name: Tony Delocht + role: Band Member + thumb: + order: 18 + imageHasChanged: false + - id: nm3482811 + name: Ernie Longoria + role: Band Member + thumb: + order: 19 + imageHasChanged: false + - ... and <6 more streamDetails: <not loaded> files: (N=0) diff --git a/test/resources/scrapers/imdbtv/Maters-Tall-Tales-tt1384816.ref.txt b/test/resources/scrapers/imdbtv/Maters-Tall-Tales-tt1384816.ref.txt index 006a52de77..83296c4328 100644 --- a/test/resources/scrapers/imdbtv/Maters-Tall-Tales-tt1384816.ref.txt +++ b/test/resources/scrapers/imdbtv/Maters-Tall-Tales-tt1384816.ref.txt @@ -10,8 +10,13 @@ showTitle: originalTitle: Mater's Tall Tales sortTitle: overview: - Cruise into the crazy adventures of Tow Mater as you watch him be a fire truck, - daredevil, import drifter, and even an astronaut. + Rev up your engines for this unforgettable collection of Cars Toons starring Mat + er, the lovable and hilarious tow truck from the hit movie Cars. From the creati + ve minds of Disney and Pixar, come nine highly entertaining Tall Tales involving + bullfights, drag races, rock concerts, monster truck showdowns, and even U.F.O. + s. Join Mater, the heart and soul of Radiator Springs, and all of your favorite + characters from the world of Cars, as they take you on a fun-filled ride that wi + ll have your family roaring with laughter. ratings (N=1) source=imdb | rating=6.8 | votes=5000 | min=0 | max=10 userRating: 0 @@ -24,31 +29,32 @@ genres: (N<6) - Comedy - Family - Adventure -tags: (N<6) +tags: (N>10) - astronaut + - lightning mcqueen character + - apostrophe in title - anthropomorphic vehicle - - space adventure - - anthropomorphic car - - car + - tow truck + - ... and >10 more certification: TV-G networks: (N=0) episodeGuideUrl: actors: (N>30) - - id: + - id: nm1249256 name: Larry the Cable Guy - role: Mater / Mator + role: Mater thumb: https://m.media-amazon.com/images/M/MV5BMTY5MjA0MjQ4Ml5BMl5BanBnXkFtZTcwNTIwMTc2 NQ@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm1584992 name: Keith Ferguson role: Lightning McQueen thumb: order: 1 imageHasChanged: false - - id: + - id: nm0172491 name: Lindsey Collins role: Mia thumb: @@ -56,7 +62,7 @@ actors: (N>30) Mw@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm2264184 name: Elissa Knight role: Tia thumb: @@ -64,98 +70,83 @@ actors: (N>30) MQ@@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0778890 name: Bob Scott - role: - Shouting Car / Additional Voice Talent / Crane / Crush Car / Rasta Carian / NASC - A Pitty #1 / Rescue Squad Trooper / Kabuto Ninjas / Military Truck #1 + role: Additional Voice Talent thumb: order: 4 imageHasChanged: false - - id: + - id: nm0348562 name: Jan Rabson - role: - Plane #1 / Additional Voice Talent / Dock Pitty / Dr. Feel Bad / Paddy O'Concret - e / NASCA Truck / Van-San / Kabuto Ninjas / Military Truck #2 / Stanley's Oasis - Population + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BOTJhNDRmZWYtNWVhYS00MmNmLWE1MzQtYWZiOTMy NjIzNzU2XkEyXkFqcGc@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0542706 name: Danny Mann - role: - Additional Voice Talent / Sparky / Clyde / Referee Pitty / Wingo / Albert / Stan - ley's Oasis Population + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BNzI3MTUzNmEtYjJiOC00NTkwLWJlMTEtOGEzZmY4 ZTVmZjgwXkEyXkFqcGc@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0194201 name: John Cygan - role: - Green Hawk / Plane #2 / Heavy Metal Delivery Pitty / Stinky / Announcer #1 / Add - itional Voice Talent / Helicopter #1 + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BNGFjOGM4ZGQtNzQzOS00MzQwLTkxZjctM2VmMTQy ZGNjNmE4XkEyXkFqcGc@.jpg order: 7 imageHasChanged: false - - id: - name: Lori Alan - role: - Additional Voice Talent / Blue Hawk / Red Car / Area 51 PA Announcer / Ford Mode - l T at Wedding - thumb: - https://m.media-amazon.com/images/M/MV5BMjk4ODUwZmQtNTJkOS00YzIzLWJlYzItMDhjNjJi - ODFmNDRlXkEyXkFqcGc@.jpg - order: 8 - imageHasChanged: false - - id: + - id: nm0363641 name: Jess Harnell - role: - Additional Voice Talent / Announcer / Announcer #2 / Mission Control Pitty / Boo - st / Broken Down Ford Model T + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BMTU3YTIzYmYtMGYwZi00ZDEyLTg2Y2EtYjk3ZWQx MzBlN2RhXkEyXkFqcGc@.jpg + order: 8 + imageHasChanged: false + - id: nm0015935 + name: Lori Alan + role: Additional Voice Talent + thumb: + https://m.media-amazon.com/images/M/MV5BMjk4ODUwZmQtNTJkOS00YzIzLWJlYzItMDhjNjJi + ODFmNDRlXkEyXkFqcGc@.jpg order: 9 imageHasChanged: false - - id: + - id: nm0569680 name: Mickie McGowan - role: Additional Voice Talent / Female Car / Mama Ship + role: Additional Voice Talent thumb: order: 10 imageHasChanged: false - - id: + - id: nm0700760 name: Steve Purcell - role: Dex / Clyde's Buddy / The I-Screamer / Pink Car / Tractors + role: Clyde's Buddy thumb: https://m.media-amazon.com/images/M/MV5BNjM3NTBmYWMtYzI1Yi00ZGUzLTg0NjMtNzYxNTAz YWNkOTA0XkEyXkFqcGc@.jpg order: 11 imageHasChanged: false - - id: + - id: nm0812307 name: Peter Sohn - role: Padre / Crush Car / Screaming Car / Ito-San's Modify Crew + role: Crush Car thumb: https://m.media-amazon.com/images/M/MV5BMTk0NzcwMzc1MF5BMl5BanBnXkFtZTcwMTY2Njkx OA@@.jpg order: 12 imageHasChanged: false - - id: + - id: nm0677037 name: Bob Peterson - role: - Bulldozers / Spanish Crowd Cars / Crush Car / Emergency Dispatcher / Announcer / - Kabuto Ninjas + role: Announcer thumb: https://m.media-amazon.com/images/M/MV5BMTU4NTQ4ODI5N15BMl5BanBnXkFtZTYwNzIwODA3 .jpg order: 13 imageHasChanged: false - - id: + - id: nm0702925 name: Guido Quaroni role: Guido thumb: @@ -163,25 +154,23 @@ actors: (N>30) ZTUwOTNmXkEyXkFqcGc@.jpg order: 14 imageHasChanged: false - - id: + - id: nm0251646 name: Paul Eiding - role: - Red Hawk / Drummer Pitty / Dr. Frankenwagon's Monster / Military Truck #3 / Scie - ntist + role: Dr. Frankenwagon's Monster thumb: https://m.media-amazon.com/images/M/MV5BMzRjZGRlYWYtMzQxNy00YmZkLWFmZjEtZDlkZjYx YzVjMmI4XkEyXkFqcGc@.jpg order: 15 imageHasChanged: false - - id: + - id: nm0628170 name: Laraine Newman - role: Curious Car / Additional Voice Talent / Nurse GTO / Announcer #2 + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BMTAxOTkwNzgxMDFeQTJeQWpwZ15BbWU4MDg0MTMy MTYx.jpg order: 16 imageHasChanged: false - - id: + - id: nm0137506 name: George Carlin role: Filmore thumb: @@ -189,15 +178,15 @@ actors: (N>30) YzY5ODA2XkEyXkFqcGc@.jpg order: 17 imageHasChanged: false - - id: + - id: nm0662088 name: Bret 'Brook' Parker - role: Spanish Crowd Cars / Crush Car / Rescue Squad Ambulance + role: Crush Car thumb: order: 18 imageHasChanged: false - - id: + - id: nm0911589 name: Jim Ward - role: Additional Voice Talent / NASCA Pitty #2 / Stanley's Oasis Population + role: Additional Voice Talent thumb: https://m.media-amazon.com/images/M/MV5BOTJhNzJkZWEtZjc4Yi00NjlhLTlmMTktMjk4OWNm NTYzZjU4XkEyXkFqcGc@.jpg @@ -217,5 +206,5 @@ backdrops: (N=0) banners: (N=0) hasTune: false extraFanarts: (N=0) -status: +status: Ended dateAdded: <not set or invalid> diff --git a/test/resources/scrapers/imdbtv/Scrubs-tt0285403.ref.txt b/test/resources/scrapers/imdbtv/Scrubs-tt0285403.ref.txt index 4ed8ddf413..330bcbed33 100644 --- a/test/resources/scrapers/imdbtv/Scrubs-tt0285403.ref.txt +++ b/test/resources/scrapers/imdbtv/Scrubs-tt0285403.ref.txt @@ -10,8 +10,10 @@ showTitle: originalTitle: Scrubs sortTitle: overview: - In the unreal world of Sacred Heart Hospital, intern John "J.D." Dorian learns t - he ways of medicine, friendship and life. + Set in the fictional Sacred Heart hospital in California, John "J.D" Dorian make + s his way through the overwhelming world of medicine, with the help of his best + friend, his fellow rookie doctors, and the arrogant, but brilliant attending phy + sician he views as his mentor. ratings (N=1) source=imdb | rating=8.4 | votes=200000 | min=0 | max=10 userRating: 0 @@ -21,17 +23,18 @@ runtime: 30min genres: (N<6) - Comedy - Drama -tags: (N<6) +tags: (N>60) + - interracial relationship + - bisexual man character in a non gay themed movie + - 2010s - hospital - - bromance - - california - - male bonding - - 2000s + - dominant woman + - ... and >60 more certification: TV-14 networks: (N=0) episodeGuideUrl: -actors: (N>50) - - id: +actors: (N>200) + - id: nm0265668 name: Donald Faison role: Dr. Christopher Turk thumb: @@ -39,47 +42,47 @@ actors: (N>50) NA@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0001525 name: John C. McGinley - role: Dr. Perry Cox / Mr. Slidell + role: Dr. Perry Cox thumb: https://m.media-amazon.com/images/M/MV5BMTY2ODk4NzQ0NF5BMl5BanBnXkFtZTgwMzk4Mjgz NDE@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0420898 name: Ken Jenkins - role: Dr. Bob Kelso / Dr. Kelso + role: Dr. Bob Kelso thumb: https://m.media-amazon.com/images/M/MV5BNTIwNTQxOTA1NV5BMl5BanBnXkFtZTcwMTY1MDQw NA@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0103785 name: Zach Braff - role: Dr. John 'J.D.' Dorian / Mrs. Zeebee + role: Dr. John 'J.D.' Dorian thumb: https://m.media-amazon.com/images/M/MV5BMTYyNjIyZGUtMDhlZC00NWYwLTlkNDEtMDU5YzBl NmY2NTQ2XkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0149950 name: Sarah Chalke - role: Dr. Elliot Reid / Elliot + role: Dr. Elliot Reid thumb: https://m.media-amazon.com/images/M/MV5BMjQyNjg4NDI4NV5BMl5BanBnXkFtZTcwOTAzOTUx Mg@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0283568 name: Neil Flynn - role: Janitor / Wheelchair Guy + role: Janitor thumb: https://m.media-amazon.com/images/M/MV5BMTcxNTYxMzYyNV5BMl5BanBnXkFtZTcwMzA3ODY3 Mg@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0721332 name: Judy Reyes role: Nurse Carla Espinosa thumb: @@ -87,7 +90,7 @@ actors: (N>50) MQ@@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0556165 name: Robert Maschio role: Todd Quinlan thumb: @@ -95,23 +98,23 @@ actors: (N>50) MQ@@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0516127 name: Sam Lloyd - role: Ted Buckland / Lawyer + role: Ted Buckland thumb: https://m.media-amazon.com/images/M/MV5BMzA3OWU0YjYtNWJhMS00NWI1LWJjOTAtOGQ5OWY0 YTZmNDg3XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0942193 name: Aloma Wright - role: Nurse Roberts / Nurse Shirley / Nurse Laverne Roberts + role: Nurse Roberts thumb: https://m.media-amazon.com/images/M/MV5BMTMzODcwNTUwOV5BMl5BanBnXkFtZTYwNjM2MDM4 .jpg order: 9 imageHasChanged: false - - id: + - id: nm0588096 name: Christa Miller role: Jordan Sullivan thumb: @@ -119,7 +122,7 @@ actors: (N>50) .jpg order: 10 imageHasChanged: false - - id: + - id: nm1056923 name: Johnny Kastl role: Doug thumb: @@ -127,7 +130,7 @@ actors: (N>50) MQ@@.jpg order: 11 imageHasChanged: false - - id: + - id: nm0005402 name: Travis Schuldt role: Keith thumb: @@ -135,21 +138,21 @@ actors: (N>50) MDA5ZDZhXkEyXkFqcGc@.jpg order: 12 imageHasChanged: false - - id: + - id: nm2255288 name: Eliza Coupe - role: Dr. Denise Mahoney / Denise + role: Dr. Denise Mahoney thumb: https://m.media-amazon.com/images/M/MV5BMjQ1MDg4MDYzMF5BMl5BanBnXkFtZTgwODUxNDg3 MzI@.jpg order: 13 imageHasChanged: false - - id: + - id: nm0256727 name: Frank Encarnacao role: Dr. Mickhead thumb: order: 14 imageHasChanged: false - - id: + - id: nm0161130 name: Charles Rahi Chun role: Dr. Wen thumb: @@ -157,15 +160,15 @@ actors: (N>50) MQ@@.jpg order: 15 imageHasChanged: false - - id: + - id: nm0777372 name: Mike Schwartz - role: Lloyd / Delivery Man / Patient / Delivery Guy + role: Lloyd thumb: https://m.media-amazon.com/images/M/MV5BMjI4NTczOTc1MV5BMl5BanBnXkFtZTgwNDIzMzcz MjE@.jpg order: 16 imageHasChanged: false - - id: + - id: nm0006969 name: Elizabeth Banks role: Dr. Kim Briggs thumb: @@ -173,21 +176,21 @@ actors: (N>50) Y2ZkNDA4XkEyXkFqcGc@.jpg order: 17 imageHasChanged: false - - id: + - id: nm1503117 name: Michael Hobert - role: Lonnie / Pizza Guy + role: Lonnie thumb: https://m.media-amazon.com/images/M/MV5BMTQ3MTU2ODI2OF5BMl5BanBnXkFtZTgwNTIyMTgz MjE@.jpg order: 18 imageHasChanged: false - - id: + - id: nm2466341 name: Andrew Miller role: Baby Jack thumb: order: 19 imageHasChanged: false - - ... and >30 more + - ... and >200 more posters: (N=1) - id: originalUrl: https://m.media-amazon.com/images/M/MV5BMWQ3ZmEwYmUtYWRhMS00OTllLTlkMzgtM2Q3MWFjOTI4MDE5XkEyXkFqcGc@.jpg @@ -201,5 +204,5 @@ backdrops: (N=0) banners: (N=0) hasTune: false extraFanarts: (N=0) -status: +status: Ended dateAdded: <not set or invalid> diff --git a/test/resources/scrapers/imdbtv/Sherlock-tt0285403.ref.txt b/test/resources/scrapers/imdbtv/Sherlock-tt0285403.ref.txt index 2d9f874b0a..1b6bfc5d09 100644 --- a/test/resources/scrapers/imdbtv/Sherlock-tt0285403.ref.txt +++ b/test/resources/scrapers/imdbtv/Sherlock-tt0285403.ref.txt @@ -10,13 +10,61 @@ showTitle: originalTitle: Sherlock sortTitle: overview: - In modern-day London, brilliant but eccentric detective Sherlock Holmes teams wi - th war veteran Dr. John Watson to crack baffling crimes, outwit formidable foes, - and aid Scotland Yard with razor-sharp deduction. + Series 1 + +Wounded Afghan veteran Dr. John Watson (Martin Freeman) meets brillian + t consulting detective Sherlock Holmes (Benedict Cumberbatch) and moves into a f + lat on 221B Baker Street with him. Their landlady, Mrs. Hudson (Una Stubbs), act + s as a de facto housekeeper for them. Sherlock's first case, "A Study in Pink", + sees him apprehend cabbi-turned-serial killer Jeff Hope (Phil Davis). Their conf + rontation escalates to the point that John, who's followed Sherlock unnoticed to + the scene, shoots the cabbie out of fear for Sherlock's life. However, Sherlock + manages to extract from the dying cabbie that he was being sponsored in his cri + mes by a figure called Moriarty, who is apparently a "fan" of Sherlock's. + +The c + ase seals Sherlock and John's friendship. In their next case, "The Blind Banker" + , they uncover a Chinese smuggling ring, which is again implied to have been lin + ked to Moriarty. + +"The Great Game" sees Sherlock being baited into solving a ser + ies of "puzzles" by Moriarty, who is as yet unseen. Each puzzle involves solving + an undetected crime, with the additional incentive being that failure to solve + the crime within a set time will lead to the death of an innocent bystander. Sim + ultaneously, Sherlock works on recovering the missing Bruce-Partington Plans for + his older brother Mycroft (Mark Gatiss), who is a powerful figure in the Britis + h government. The series ends with Sherlock encountering Moriarty (Andrew Scott) + by a swimming pool, where Moriarty warns Sherlock that he will destroy him. + +Se + ries 2 + +Moriarty's pool-side encounter with Sherlock ends abruptly when Moriarty + receives a cellphone call from dominatrix Irene Adler (Lara Pulver). In "A Scan + dal in Belgravia", Sherlock is assigned by Buckingham Palace to recover some dam + aging photographs in Irene's camera phone. Sherlock meets Irene but is outwitted + and fails to retrieve her phone. She becomes "the Woman" in his mind. + +Months + later, Irene meets Sherlock once more and tricks him into decoding a message for + her, which turns out to be a matter of national security. Irene passes on the m + essage to Moriarty and then blackmails Mycroft into granting her a list of deman + ds or face Sherlock's exposure as a security leak. However, at the last moment, + Sherlock figures out the password to her camera phone and crushes her bargaining + power. The defeated Irene fakes her death once more with Sherlock's help to esc + ape the wrath of the British government and enter a life of anonymity. + +Sherloc + k and John travel to Dartmoor in "The Hounds of Baskerville" to solve the myster + y of a gigantic hound that apparently caused the death of client Henry Knight's + father years ago. The "Hound" turns out to be a hallucinogenic drug that was bei + ng secretly developed as a chemical weapon at the nearby military base of Basker + ville. The man responsible, Dr. Frank Mortimer, is killed in a land mine explosi + on. ratings (N=1) source=imdb | rating=9 | votes=1000000 | min=0 | max=10 userRating: 0 -imdbTop250: 23 +imdbTop250: -1 firstAired: 2010-10-24 runtime: 90min genres: (N<6) @@ -24,17 +72,18 @@ genres: (N<6) - Thriller - Mystery - Drama -tags: (N<6) - - genius - - murder - - modernized setting - - detective - - friendship +tags: (N>70) + - friends who live together + - inspector + - dark + - asexual protagonist + - police + - ... and >60 more certification: TV-14 networks: (N=0) episodeGuideUrl: -actors: (N>50) - - id: +actors: (N>200) + - id: nm1212722 name: Benedict Cumberbatch role: Sherlock Holmes thumb: @@ -42,7 +91,7 @@ actors: (N>50) MzE@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0293509 name: Martin Freeman role: Dr. John Watson thumb: @@ -50,7 +99,7 @@ actors: (N>50) Nw@@.jpg order: 1 imageHasChanged: false - - id: + - id: nm0835939 name: Una Stubbs role: Mrs. Hudson thumb: @@ -58,23 +107,23 @@ actors: (N>50) OA@@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0001291 name: Rupert Graves - role: DI Lestrade / DI Greg Lestrade / Inspector Lestrade + role: DI Lestrade thumb: https://m.media-amazon.com/images/M/MV5BOTc0M2U5NDItODVlNS00M2JjLWIxNzYtMGYwOTI1 MzIyNzliXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm1154764 name: Louise Brealey - role: Molly Hooper / Hooper + role: Molly Hooper thumb: https://m.media-amazon.com/images/M/MV5BZjhjNjQxMWYtZWIyZS00ZTA2LThjMDYtMTMyMzc4 ODU1ZTU2XkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0309693 name: Mark Gatiss role: Mycroft Holmes thumb: @@ -82,23 +131,23 @@ actors: (N>50) OQ@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0778831 name: Andrew Scott - role: Jim Moriarty / Professor Moriarty + role: Jim Moriarty thumb: https://m.media-amazon.com/images/M/MV5BMTQ5MjI2NTc1Ml5BMl5BanBnXkFtZTcwMzM5NjY4 Nw@@.jpg order: 6 imageHasChanged: false - - id: + - id: nm0007893 name: Amanda Abbington - role: Mary Morstan / Mary Watson + role: Mary Morstan thumb: https://m.media-amazon.com/images/M/MV5BODUwOTQ1OTY2Nl5BMl5BanBnXkFtZTcwMjgzMDgx OA@@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0034877 name: Jonathan Aris role: Anderson thumb: @@ -106,7 +155,7 @@ actors: (N>50) OTA4NDA2XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0733172 name: Vinette Robinson role: Sgt Sally Donovan thumb: @@ -114,7 +163,7 @@ actors: (N>50) N2IwZmE0XkEyXkFqcGc@.jpg order: 9 imageHasChanged: false - - id: + - id: nm0600439 name: Tanya Moodie role: Ella thumb: @@ -122,7 +171,7 @@ actors: (N>50) NDlkMWUzXkEyXkFqcGc@.jpg order: 10 imageHasChanged: false - - id: + - id: nm0242026 name: Lindsay Duncan role: Lady Smallwood thumb: @@ -130,31 +179,31 @@ actors: (N>50) OA@@.jpg order: 11 imageHasChanged: false - - id: + - id: nm2055297 name: Yasmine Akram - role: Janine / Janine Donlevy + role: Janine thumb: https://m.media-amazon.com/images/M/MV5BOGI4OTNmNTctMDVhOC00ZDY0LTgxMDgtODg5MmJi YjhkNWJiXkEyXkFqcGc@.jpg order: 12 imageHasChanged: false - - id: + - id: nm1604939 name: Sian Brooke - role: Elizabeth / Eurus Holmes + role: Elizabeth thumb: https://m.media-amazon.com/images/M/MV5BOWIwYTQwMDktNzBhMS00NDI2LTk5MzItNTE0YTU4 ZjcwNGMyXkEyXkFqcGc@.jpg order: 13 imageHasChanged: false - - id: + - id: nm0625085 name: David Nellist - role: Mike Stamford / Stamford + role: Mike Stamford thumb: https://m.media-amazon.com/images/M/MV5BMjEwODQ5NTkwNF5BMl5BanBnXkFtZTgwMTM5NjE1 NTE@.jpg order: 14 imageHasChanged: false - - id: + - id: nm0475336 name: Simon Kunz role: Sir Edwin thumb: @@ -162,37 +211,37 @@ actors: (N>50) ZWZkZTk1XkEyXkFqcGc@.jpg order: 15 imageHasChanged: false - - id: - name: Timothy Carlton - role: Sherlock's Father / Dad / Mr. Holmes + - id: nm0893243 + name: Wanda Ventham + role: Mrs. Holmes thumb: - https://m.media-amazon.com/images/M/MV5BMjAwNDQ1OTEwOV5BMl5BanBnXkFtZTcwOTY0NTgx + https://m.media-amazon.com/images/M/MV5BMzQ1MDYxNjQ0OF5BMl5BanBnXkFtZTcwMTcwODgx OA@@.jpg order: 16 imageHasChanged: false - - id: - name: Wanda Ventham - role: Sherlock's Mother / Mum / Mrs. Holmes + - id: nm0138232 + name: Timothy Carlton + role: Dad thumb: - https://m.media-amazon.com/images/M/MV5BMzQ1MDYxNjQ0OF5BMl5BanBnXkFtZTcwMTcwODgx + https://m.media-amazon.com/images/M/MV5BMjAwNDQ1OTEwOV5BMl5BanBnXkFtZTcwOTY0NTgx OA@@.jpg order: 17 imageHasChanged: false - - id: - name: Lara Pulver - role: Irene Adler - thumb: - https://m.media-amazon.com/images/M/MV5BNGY0ZTZhMTAtZmMxYi00ZjRmLTllYTItMjQ1OWFi - NzNmY2NjXkEyXkFqcGc@.jpg - order: 18 - imageHasChanged: false - - id: + - id: nm1933893 name: Lisa McAllister role: Anthea thumb: + order: 18 + imageHasChanged: false + - id: nm0854364 + name: Zoe Telford + role: Sarah + thumb: + https://m.media-amazon.com/images/M/MV5BMzk5NWM5OTktNTkxNi00ZDZjLWEwYzktYTE4N2Zi + MWYwMmM5XkEyXkFqcGc@.jpg order: 19 imageHasChanged: false - - ... and >30 more + - ... and >200 more posters: (N=1) - id: originalUrl: https://m.media-amazon.com/images/M/MV5BNTQzNGZjNDEtOTMwYi00MzFjLWE2ZTYtYzYxYzMwMjZkZDc5XkEyXkFqcGc@.jpg @@ -206,5 +255,5 @@ backdrops: (N=0) banners: (N=0) hasTune: false extraFanarts: (N=0) -status: +status: Ended dateAdded: <not set or invalid> diff --git a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-minimal-details.ref.txt b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-minimal-details.ref.txt index d7e065dd33..d24c705718 100644 --- a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-minimal-details.ref.txt +++ b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-minimal-details.ref.txt @@ -16,8 +16,14 @@ episode: EpisodeNumber=19 displaySeason: SeasonNumber=xx displayEpisode: EpisodeNumber=xx overview: - With Homer's help, Flanders tries to establish a Christian theme park based on d - rawings in Maude's sketch book. + Ned opens a theme park to the memory of his late wife Maude and it becomes a hug + e success when people kneeling in front of a statue of Maude experience mystic v + isions. The reason for this is that a grille in front of the statue is an out-pi + pe for a propane gas line and they are getting high on the gas. Unfortunately th + e park is closed down when Homer and Ned try to stop two children from lighting + a candle before the altar and are charged with assault.Ned does,however,enjoy th + e further company of Rachel Jordan,despite his efforts to turn her into a clone + of Maude. writers: (N<6) - Julie Thacker-Scully - Joel H. Cohen @@ -30,28 +36,21 @@ directors: (N<6) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2001-05-06 -tags: (N<6) - - tv show theme song - - lazy man - - cameo - - lazy employee - - american cult tv +tags: (N=0) epBookmark: <not set or invalid> certification: TV-14 networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BMzc1Njk2YzItNDZhZC00NzI1LWFhMzMtMTQ5Y2I2NDNjYzc2XkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>10) + - id: nm0144657 name: Dan Castellaneta - role: - Homer Simpson / Rich Texan / Krusty / Willie / Mayor Quimby / Sideshow Mel / Joh - n Travolta + role: Homer Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjAyNDY5NDEwOV5BMl5BanBnXkFtZTcwMjY0ODYy MQ@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0001413 name: Julie Kavner role: Marge Simpson thumb: @@ -59,15 +58,15 @@ actors: (N>5) .jpg order: 1 imageHasChanged: false - - id: + - id: nm0004813 name: Nancy Cartwright - role: Bart Simpson / Nelson Muntz / Todd Flanders / Ralph Wiggum + role: Bart Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjZmYTQ1YjItZTUzYi00Nzk1LTlmNzEtMmRmMjM4 NjZmOTcyXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0810379 name: Yeardley Smith role: Lisa Simpson thumb: @@ -75,27 +74,23 @@ actors: (N>5) MTdkODZmXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0000279 name: Hank Azaria - role: - Moe Szyslak / Frink / Cletus / Chief Wiggum / Carl / Dr. Nick Riviera / Apu / Ch - almers / Comic Book Guy / Kirk Van Houten / Captain McAllister / Disco Stu + role: Moe Szyslak thumb: https://m.media-amazon.com/images/M/MV5BMTQ0MzIxMzUwMV5BMl5BanBnXkFtZTcwMjE1NjM1 Mg@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0733427 name: Harry Shearer - role: - Ned Flanders / Reverend Lovejoy / Dr. Hibbert / Kent Brockman / King David / Len - ny / Louie / Skinner / St. Peter + role: Ned Flanders thumb: https://m.media-amazon.com/images/M/MV5BMTIxNDg4MzA5OF5BMl5BanBnXkFtZTYwOTM3MDc1 .jpg order: 5 imageHasChanged: false - - id: + - id: nm0173448 name: Shawn Colvin role: Rachel Jordan thumb: @@ -103,7 +98,7 @@ actors: (N>5) .jpg order: 6 imageHasChanged: false - - id: + - id: nm0908761 name: Marcia Wallace role: Edna Krabappel thumb: @@ -111,7 +106,7 @@ actors: (N>5) MDE@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0594217 name: Marcia Mitzman Gaven role: Helen Lovejoy thumb: @@ -119,13 +114,27 @@ actors: (N>5) OWY5MDY0XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0370788 name: Pamela Hayden - role: Milhouse Van Houten / Rod Flanders / Patches + role: Milhouse Van Houten thumb: https://m.media-amazon.com/images/M/MV5BMTY1NjU1ODI1Ml5BMl5BanBnXkFtZTYwNDE2ODQz .jpg order: 9 imageHasChanged: false + - id: nm0534134 + name: Tress MacNeille + role: Agnes Skinner + thumb: + https://m.media-amazon.com/images/M/MV5BMTQxMTQ1NzY5N15BMl5BanBnXkFtZTcwMDY5NDUw + OA@@.jpg + order: 10 + imageHasChanged: false + - id: nm0927293 + name: Karl Wiedergott + role: + thumb: + order: 11 + imageHasChanged: false streamDetails: <not loaded> files: (N=0) diff --git a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-all-details.ref.txt b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-all-details.ref.txt index 35dbfbe49e..aa8ad12a81 100644 --- a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-all-details.ref.txt +++ b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-all-details.ref.txt @@ -16,8 +16,14 @@ episode: EpisodeNumber=xx displaySeason: SeasonNumber=xx displayEpisode: EpisodeNumber=xx overview: - With Homer's help, Flanders tries to establish a Christian theme park based on d - rawings in Maude's sketch book. + Ned opens a theme park to the memory of his late wife Maude and it becomes a hug + e success when people kneeling in front of a statue of Maude experience mystic v + isions. The reason for this is that a grille in front of the statue is an out-pi + pe for a propane gas line and they are getting high on the gas. Unfortunately th + e park is closed down when Homer and Ned try to stop two children from lighting + a candle before the altar and are charged with assault.Ned does,however,enjoy th + e further company of Rachel Jordan,despite his efforts to turn her into a clone + of Maude. writers: (N<6) - Julie Thacker-Scully - Joel H. Cohen @@ -30,28 +36,21 @@ directors: (N<6) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2001-05-06 -tags: (N<6) - - tv show theme song - - lazy man - - cameo - - lazy employee - - american cult tv +tags: (N=0) epBookmark: <not set or invalid> certification: TV-14 networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BMzc1Njk2YzItNDZhZC00NzI1LWFhMzMtMTQ5Y2I2NDNjYzc2XkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>10) + - id: nm0144657 name: Dan Castellaneta - role: - Homer Simpson / Rich Texan / Krusty / Willie / Mayor Quimby / Sideshow Mel / Joh - n Travolta + role: Homer Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjAyNDY5NDEwOV5BMl5BanBnXkFtZTcwMjY0ODYy MQ@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0001413 name: Julie Kavner role: Marge Simpson thumb: @@ -59,15 +58,15 @@ actors: (N>5) .jpg order: 1 imageHasChanged: false - - id: + - id: nm0004813 name: Nancy Cartwright - role: Bart Simpson / Nelson Muntz / Todd Flanders / Ralph Wiggum + role: Bart Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjZmYTQ1YjItZTUzYi00Nzk1LTlmNzEtMmRmMjM4 NjZmOTcyXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0810379 name: Yeardley Smith role: Lisa Simpson thumb: @@ -75,27 +74,23 @@ actors: (N>5) MTdkODZmXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0000279 name: Hank Azaria - role: - Moe Szyslak / Frink / Cletus / Chief Wiggum / Carl / Dr. Nick Riviera / Apu / Ch - almers / Comic Book Guy / Kirk Van Houten / Captain McAllister / Disco Stu + role: Moe Szyslak thumb: https://m.media-amazon.com/images/M/MV5BMTQ0MzIxMzUwMV5BMl5BanBnXkFtZTcwMjE1NjM1 Mg@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0733427 name: Harry Shearer - role: - Ned Flanders / Reverend Lovejoy / Dr. Hibbert / Kent Brockman / King David / Len - ny / Louie / Skinner / St. Peter + role: Ned Flanders thumb: https://m.media-amazon.com/images/M/MV5BMTIxNDg4MzA5OF5BMl5BanBnXkFtZTYwOTM3MDc1 .jpg order: 5 imageHasChanged: false - - id: + - id: nm0173448 name: Shawn Colvin role: Rachel Jordan thumb: @@ -103,7 +98,7 @@ actors: (N>5) .jpg order: 6 imageHasChanged: false - - id: + - id: nm0908761 name: Marcia Wallace role: Edna Krabappel thumb: @@ -111,7 +106,7 @@ actors: (N>5) MDE@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0594217 name: Marcia Mitzman Gaven role: Helen Lovejoy thumb: @@ -119,13 +114,27 @@ actors: (N>5) OWY5MDY0XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0370788 name: Pamela Hayden - role: Milhouse Van Houten / Rod Flanders / Patches + role: Milhouse Van Houten thumb: https://m.media-amazon.com/images/M/MV5BMTY1NjU1ODI1Ml5BMl5BanBnXkFtZTYwNDE2ODQz .jpg order: 9 imageHasChanged: false + - id: nm0534134 + name: Tress MacNeille + role: Agnes Skinner + thumb: + https://m.media-amazon.com/images/M/MV5BMTQxMTQ1NzY5N15BMl5BanBnXkFtZTcwMDY5NDUw + OA@@.jpg + order: 10 + imageHasChanged: false + - id: nm0927293 + name: Karl Wiedergott + role: + thumb: + order: 11 + imageHasChanged: false streamDetails: <not loaded> files: (N=0) diff --git a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-minimal-details.ref.txt b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-minimal-details.ref.txt index 35dbfbe49e..aa8ad12a81 100644 --- a/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-minimal-details.ref.txt +++ b/test/resources/scrapers/imdbtv/The-Simpsons-S12E19-tt0701133-minimal-details.ref.txt @@ -16,8 +16,14 @@ episode: EpisodeNumber=xx displaySeason: SeasonNumber=xx displayEpisode: EpisodeNumber=xx overview: - With Homer's help, Flanders tries to establish a Christian theme park based on d - rawings in Maude's sketch book. + Ned opens a theme park to the memory of his late wife Maude and it becomes a hug + e success when people kneeling in front of a statue of Maude experience mystic v + isions. The reason for this is that a grille in front of the statue is an out-pi + pe for a propane gas line and they are getting high on the gas. Unfortunately th + e park is closed down when Homer and Ned try to stop two children from lighting + a candle before the altar and are charged with assault.Ned does,however,enjoy th + e further company of Rachel Jordan,despite his efforts to turn her into a clone + of Maude. writers: (N<6) - Julie Thacker-Scully - Joel H. Cohen @@ -30,28 +36,21 @@ directors: (N<6) playCount: 0 lastPlayed: <not set or invalid> firstAired: 2001-05-06 -tags: (N<6) - - tv show theme song - - lazy man - - cameo - - lazy employee - - american cult tv +tags: (N=0) epBookmark: <not set or invalid> certification: TV-14 networks: (N=0) thumbnail: https://m.media-amazon.com/images/M/MV5BMzc1Njk2YzItNDZhZC00NzI1LWFhMzMtMTQ5Y2I2NDNjYzc2XkEyXkFqcGc@.jpg -actors: (N>5) - - id: +actors: (N>10) + - id: nm0144657 name: Dan Castellaneta - role: - Homer Simpson / Rich Texan / Krusty / Willie / Mayor Quimby / Sideshow Mel / Joh - n Travolta + role: Homer Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjAyNDY5NDEwOV5BMl5BanBnXkFtZTcwMjY0ODYy MQ@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0001413 name: Julie Kavner role: Marge Simpson thumb: @@ -59,15 +58,15 @@ actors: (N>5) .jpg order: 1 imageHasChanged: false - - id: + - id: nm0004813 name: Nancy Cartwright - role: Bart Simpson / Nelson Muntz / Todd Flanders / Ralph Wiggum + role: Bart Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjZmYTQ1YjItZTUzYi00Nzk1LTlmNzEtMmRmMjM4 NjZmOTcyXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0810379 name: Yeardley Smith role: Lisa Simpson thumb: @@ -75,27 +74,23 @@ actors: (N>5) MTdkODZmXkEyXkFqcGc@.jpg order: 3 imageHasChanged: false - - id: + - id: nm0000279 name: Hank Azaria - role: - Moe Szyslak / Frink / Cletus / Chief Wiggum / Carl / Dr. Nick Riviera / Apu / Ch - almers / Comic Book Guy / Kirk Van Houten / Captain McAllister / Disco Stu + role: Moe Szyslak thumb: https://m.media-amazon.com/images/M/MV5BMTQ0MzIxMzUwMV5BMl5BanBnXkFtZTcwMjE1NjM1 Mg@@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0733427 name: Harry Shearer - role: - Ned Flanders / Reverend Lovejoy / Dr. Hibbert / Kent Brockman / King David / Len - ny / Louie / Skinner / St. Peter + role: Ned Flanders thumb: https://m.media-amazon.com/images/M/MV5BMTIxNDg4MzA5OF5BMl5BanBnXkFtZTYwOTM3MDc1 .jpg order: 5 imageHasChanged: false - - id: + - id: nm0173448 name: Shawn Colvin role: Rachel Jordan thumb: @@ -103,7 +98,7 @@ actors: (N>5) .jpg order: 6 imageHasChanged: false - - id: + - id: nm0908761 name: Marcia Wallace role: Edna Krabappel thumb: @@ -111,7 +106,7 @@ actors: (N>5) MDE@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0594217 name: Marcia Mitzman Gaven role: Helen Lovejoy thumb: @@ -119,13 +114,27 @@ actors: (N>5) OWY5MDY0XkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0370788 name: Pamela Hayden - role: Milhouse Van Houten / Rod Flanders / Patches + role: Milhouse Van Houten thumb: https://m.media-amazon.com/images/M/MV5BMTY1NjU1ODI1Ml5BMl5BanBnXkFtZTYwNDE2ODQz .jpg order: 9 imageHasChanged: false + - id: nm0534134 + name: Tress MacNeille + role: Agnes Skinner + thumb: + https://m.media-amazon.com/images/M/MV5BMTQxMTQ1NzY5N15BMl5BanBnXkFtZTcwMDY5NDUw + OA@@.jpg + order: 10 + imageHasChanged: false + - id: nm0927293 + name: Karl Wiedergott + role: + thumb: + order: 11 + imageHasChanged: false streamDetails: <not loaded> files: (N=0) diff --git a/test/resources/scrapers/imdbtv/The-Simpsons-tt0096697-minimal-details.ref.txt b/test/resources/scrapers/imdbtv/The-Simpsons-tt0096697-minimal-details.ref.txt index 7f6ef27460..19d108e059 100644 --- a/test/resources/scrapers/imdbtv/The-Simpsons-tt0096697-minimal-details.ref.txt +++ b/test/resources/scrapers/imdbtv/The-Simpsons-tt0096697-minimal-details.ref.txt @@ -10,443 +10,217 @@ showTitle: originalTitle: The Simpsons sortTitle: overview: - The satiric half-hour adventures of a working-class family in the misfit city of - Springfield. + 'The Simpsons' are a dysfunctional family that have a tendency to go on wacky ad + ventures. The family consists of five members, Homer, Marge, Bart, Lisa and Magg + ie. + +Homer: husband of Marge and father of Bart, Lisa and Maggie. He is a safety + supervisor at the nuclear power plant and loves his family, even though he can + be a bit aggressive at times, like strangling Bart. Homer also loves doughnuts a + nd food in general. He can be a bit lazy at times. + +Marge: wife of Homer and mot + her of Bart, Lisa and Maggie. Marge is frequently seen doing jobs around the hou + se and sometimes feels a bit left out by her family. Sometimes she gets mad at H + omer and makes him sleep on the couch or even kicks him out the house until he r + ealizes what he's done. Marge does appear to have a connection to all her kids. + + +Bart: At 10 years of age, Bart is Homer and Marges first born child. He is ofte + n considered mischievous, but has also portrayed signs of really caring about pe + ople round Springfield. Bart also loves to prank and hang out with his best frie + nd, Milhouse. + +Lisa: At 8 years of age, Lisa is the first born Daughter and midd + le child of the family. She is the smartest member of the family and is talented + , as she is frequently seen playing her saxophone, she has also been seen playin + g the guitar. Lisa tends to differ from her family, as she is a vegetarian and a + Buddhist. + +Maggie: As the baby, Maggie is the second born Daughter and youngest + member of the Simpson family, she rarely talks in the series, but has still sho + wn signs of intelligence and has even been seen changing her own diaper. Despite + being sweet most of the time, Maggie has shown signs of being a bit aggressive + with weapons. ratings (N=1) source=imdb | rating=8.6 | votes=400000 | min=0 | max=10 userRating: 0 -imdbTop250: 113 +imdbTop250: -1 firstAired: 1989-12-17 runtime: 22min genres: (N<6) - Animation - Comedy -tags: (N<6) - - cult tv - - satire - - sitcom - - beer - - family as protagonists -certification: TV-MA +tags: (N>100) + - aunt niece relationship + - spoof + - suburb + - neighbor neighbor relationship + - schoolgirl + - ... and >90 more +certification: TV-14 networks: (N=0) episodeGuideUrl: -actors: (N>50) - - id: +actors: (N>200) + - id: nm0144657 name: Dan Castellaneta - role: - Homer Simpson / Grampa Simpson / Barney Gumble / Groundskeeper Willie / Sideshow - Mel / Krusty the Clown / Mayor Quimby / Hans Moleman / Squeaky-Voiced Teen / Sa - nta's Little Helper / Grampa / Krusty / Itchy / Rich Texan / Krusty the Klown / - Squeaky Voiced Teen / Kodos / Gil Gunderson / Gil / Louie / Abraham Simpson / Ba - rney / Mr. Teeny / Blue-Haired Lawyer / Snowball II / Scott Christian / Marty / - Bill / Arnie Pie / Arnie Pye / Blue Haired Lawyer / Reporter / Count Dracula / B - enjamin / Bill Clinton / Charlie / Frankie the Squealer / Abe Simpson / Addition - al Voices / Hot Dog Vendor / Leprechaun / Sam / Willie / Worker / Butler / Burns - ' Lawyer / The Yes Guy / Monkey / Quimby / Mailman / Bill Cosby / Abraham 'Gramp - a' Simpson / Woody Allen / Capt. Lance Murdock / Audience Member / George Washin - gton / Announcer / Man in Audience / John Travolta / Moleman / Player / Homer's - Brain / Cop / Doug / Monkeys / Man #2 / Crowd Members / Reporter #1 / Man in Cro - wd / Guard / Reporter #2 / Soldier / Poochie / Squeaky-voiced teen / St. Peter / - Smitty / Wolves / Jake the Barber / Reporter #4 / Reporter #5 / Heckler #1 / Be - er Vendor / Sailor / Stuart / Man on Street / Man in Crowd #2 / Benjamin Frankli - n / Abraham Lincoln / Ice Cream Man / TV Voice / Conductor / Gomer Pyle / French - man / News Director / Uncle Sam / Troy McClure / Smartline Announcer / Writer / - Arthur Miller / Waiter + role: Homer Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjAyNDY5NDEwOV5BMl5BanBnXkFtZTcwMjY0ODYy MQ@@.jpg order: 0 imageHasChanged: false - - id: + - id: nm0001413 name: Julie Kavner - role: - Marge Simpson / Patty Bouvier / Selma Bouvier / Jacqueline Bouvier / Others / Ja - ckie Bouvier / Eunice Bouvier / Bowler #2 / Receptionist / Actress as Marge / Nu - rse / Teacher / Marge's Grandmother / Queen Elizabeth I / Elizabeth Raleigh / Ca - rtoon Squirrel / Margarine of aragon / Angela Lansburry / Herb's Mother / Aunt G - ladys / Marge Bufflekill / Mary / Pagan Mother / Audience / Mabel Simpson / Adil - 's Mother / Floor Buffer / Whispering Teachers / Gloria / Spirit #3 / Zombie DJ - #1 / Marge Bar / Genevieve Bouvier / Marge robots / Majora / Frida Kahlo / Patty - / Selma / Turkey / Marge Albertson / The Pookadook + role: Marge Simpson thumb: https://m.media-amazon.com/images/M/MV5BMTcwMjUzMTc1OF5BMl5BanBnXkFtZTYwODE0MjM0 .jpg order: 1 imageHasChanged: false - - id: + - id: nm0004813 name: Nancy Cartwright - role: - Bart Simpson / Nelson Muntz / Ralph Wiggum / Kearney / Todd Flanders / Maggie Si - mpson / Kearney Zzyzwicz / Database / Additional Voices / Lewis / Martin Prince - / Ralph / Rod Flanders / Girl / Boy / Others / Miss Springfield / White-Haired G - irl / Ling Bouvier / Woman #1 / Crowd Members / Krusty's Assistant / Nina Skalka - / Mother / Kearney Jr. / Child Crowd / International Harvester Spuckler / Kent - Brockman's Daughter / Gavin / Baby / Crystal Meth Spuckler / British Woman / Ger - ms / Yay Crowd / Joe Spuckler / Kearney's Son / Auditioning Woman #2 / Auditioni - ng Woman #3 / Dancing Girl #3 / 1st Parrot / Chorus Girl #3 / Hospital Tannoy / - Students / Child #2 / Dr. Hibbert's Younger Son / Girl with Sore Teeth / Child # - 3 / Child #5 / Skinner's secretary / Lassie Actress / Saleswoman / Woman in 'Spa - ce Mutants VIII' / Nun #2 / Shouting Crowd / Elmo / Dino-Son / Homer's cousin's - son / Nelson Moontz / Junior Camper / Baby at Park / Baby #1 / Baby #2 / Snowbal - l II / Kid / Exercise Video Girl / Girls / Various Kids / Homer's Date / Girl at - Kissing Booth / Ex-Wife #2 / Simon Woosterfield / Young Bully / Glee Club Singe - rs / Brittany / Girl on Bus / Wanda / Becky / Prince Ralph of Austria / Nelson's - children / Wendell / Female Scientist / Gravey / Polly / Charlotte / Picard Sim - pson / Kirk Simpson / Vicky / Louise / Ashley Grant Actress / Jamshed / Admiral - Baby / Paperboy / Photography Club Member #2 / Freddy / Boy at Kwik-E-Mart / Act - or / Inanimate Carbon Rod #3 / Nuclear Waste / "Cockamamie's" Salesgirl / Child + role: Bart Simpson thumb: https://m.media-amazon.com/images/M/MV5BMjZmYTQ1YjItZTUzYi00Nzk1LTlmNzEtMmRmMjM4 NjZmOTcyXkEyXkFqcGc@.jpg order: 2 imageHasChanged: false - - id: + - id: nm0733427 name: Harry Shearer - role: - Ned Flanders / Principal Skinner / Kent Brockman / Lenny / Mr. Burns / Dr. Hibbe - rt / Lenny Leonard / Waylon Smithers / Reverend Lovejoy / Otto / Montgomery Burn - s / Scratchy / Jasper / Smithers / Rainier Wolfcastle / Eddie / Kang / Otto Mann - / Rev. Lovejoy / Mr. Largo / Judge Snyder / Announcer / Dewey Largo / TV Announ - cer / God / Principal Seymour Skinner / Skinner / Seymour Skinner / Jasper Beard - ly / Dr. Julius Hibbert / Hibbert / Herman / Marty / Dr. Marvin Monroe / Legs / - Radio Announcer / Bill / Reverand Lovejoy / Judge / Tom Brokaw / Additional Voic - es / Bill Clinton / McBain / Louie / Gary / Dave Shutton / Narrator / Jasper Bea - rdley / Sanjay / Charles Montgomery Burns / Others / Guard / Principal Dondeling - er / Ronald Reagan / Baseball Announcer / George H.W. Bush / Herman Hermann / Je - rry / Dr. J. Loren Pryor / Director / Man / Commercial Announcer / Richard Nixon - / Adolf Hitler / Evil Laugh / J. Loren Pryor / Al Gore / Lou / Guard #2 / Sport - scaster / George H. W. Bush / Bob Dole / Doctor / First Man / Johnny Carson / Ne - dward Flanders, Sr. / Jebediah Springfield / Cesar / Concert Announcer / Pilot / - Stadium Announcer / Clifford Burns / Clancy Bouvier / Rabbi Rudenstein / Otto M - an / Learned Pervert / Satan / Dutch Policeman / European Judge #2 / Mr. Prince - / Movie Announcer / Hitler / Bartender / Announcer #1 / Radio DJ / Krusty Show A - nnouncer / Football Announcer / Teamster / Man on TV / Sanjay Nahasapeemapetilon + role: Ned Flanders thumb: https://m.media-amazon.com/images/M/MV5BMTIxNDg4MzA5OF5BMl5BanBnXkFtZTYwOTM3MDc1 .jpg order: 3 imageHasChanged: false - - id: + - id: nm0810379 name: Yeardley Smith - role: - Lisa Simpson / Cecile Shapiro / Grandma Flanders / Yay Crowd / Rudy Huxtable / C - osine / Shouting Crowd / Woman in Queue / Estonian Dwarf as Lisa / Alive Charact - er #2 / Snow White / Jake Boyman / Zia Simpson / Crowd Members / Inanimate Carbo - n Rod #2 / Nuclear Waste / Surgery Audience / Dil / Francine Rhenquist / Angry C - rowd / Child Crowd / Child at Dentist's / Lisabello / Pahasatira Nahasapeemapeti - lon / Marge's Friend #1 / Maggie Simpson / Anonymous Students / Retirement Party - Singers / Singing Crowd / Sacajawea / Salieri / Lisa Jr. / Driver / Connie Appl - eseed / Becky Thatcher / Gabriel / Pagan Daughter / 'Share my locker' / Eliza Si - mpson / Angelica Button / Shouting Teachers / Singing Choir / Itchy & Scratchy S - how End Credits Singers / Voice Activated Search Machine / Lisa's Guilt / Bessie - / Apple / Genital Snurfs / Kate / Siddmartha / Lisa Cactus / Tracey Ullman Lisa - Simpson / Lisandra / Lisanardo da Vinci / Improv Shakespeare Actress #2 / Yeard - ley Smith / Lisa's Mindsets / Lizzie Albertson / Malibu Kevin / Lisa's Granddaug - hter / Edith + role: Lisa Simpson thumb: https://m.media-amazon.com/images/M/MV5BNzcyNjQ1NTAtMDk4My00NDc1LWIzYjMtYmQ1MGE3 MTdkODZmXkEyXkFqcGc@.jpg order: 4 imageHasChanged: false - - id: + - id: nm0000279 name: Hank Azaria - role: - Moe Szyslak / Chief Wiggum / Carl / Comic Book Guy / Lou / Kirk Van Houten / Sup - erintendent Chalmers / Carl Carlson / Apu Nahasapeemapetilon / Apu / Professor F - rink / Cletus / Snake / Disco Stu / Old Jewish Man / Bumblebee Man / Wiseguy / D - uffman / Dr. Nick Riviera / Raphael / Cletus Spuckler / Drederick Tatum / Prof. - Frink / Luigi / Captain McAllister / Luigi Risotto / Snake Jailbird / Sea Captai - n / Johnny Tightlips / Captain Horatio McCallister / Captain McCallister / Julio - / Coach Krupt / Clancy Wiggum / Gary Chalmers / Frink / Waiter / Director / Cle - rk / Moe / Chief Clancy Wiggum / TV Announcer / Additional Voices / Crazy Old Ma - n / The Sea Captain / Auctioneer / Roofi / Ranger / Cab Driver / Announcer / Dou - g / Superintendant Chalmers / Tour Guide / Chalmers / Doctor / Chazz Busby / Moe - Syzlak / Gabbo / Ron Rabinowitz / Bailiff / Nobel Prize Announcer / Van Krusten - / European Judge #1 / Easy Reader / Sprooklyn Bum / Guy Talking To Krusty / Leg - s / Delivery Man / Lifeguard / Santa Claus / Homeless Man / Chef / Man in Crowd - / Pilot / Man / Robot / Jesus / Old Jewish Guy / Gunter / Very Tall Man / Report - er #3 / Sam / Guard / Pyro / Bellhop / Audience Member / Security Guard / Jack / - Lawyer / Captain / Krusty's Assistant / Hippie / Technician / General / Veterin - arian / Superintendent Gary Chalmers / Akira / Construction Worker / Benjamin / - Crowd Members + role: Moe Szyslak thumb: https://m.media-amazon.com/images/M/MV5BMTQ0MzIxMzUwMV5BMl5BanBnXkFtZTcwMjE1NjM1 Mg@@.jpg order: 5 imageHasChanged: false - - id: + - id: nm0370788 name: Pamela Hayden - role: - Milhouse Van Houten / Jimbo Jones / Rod Flanders / Janey Powell / Sarah Wiggum / - Wendell / Additional Voices / Janey / Nurse / Jimbo / Crowd Members / Boy / Mar - tin Prince / Others / Dolph / Lewis / Kid / Richard / Patches / Child Crowd / Mi - lhouse van Houten / Birthday Spuckler / Receptionist / Audience Member / Child # - 1 / Ham / Actress / Lady / Child / Old Woman / Waitress / Woman in Crowd / Jitne - y Spuckler / Angry Crowd / Cheering Crowd / Miss Springfield / Woman in Casino / - Bill Cosby's Daughter / Reporter / Police Dispatcher / Pregnant Woman / Laurie - / Chimp Nurse / Dr. Hibbert's Son / Reporter #2 / Japanese Mother / Woman Praisi - ng Barney's Movie / Maude Flanders / E-Mail / Dancing Girl #1 / Fan-Demonium Che - erleader / Woman on Street / Around the World Performer / Chorus Girl #1 / Choru - s Girl #3 / Princess Opal the Psychic / Krusty's Fan / BartChat Girl / Televisio - n Aerobics Instructor / Students / Luanne Van Houten / Escape from Grandma's Hou - se Announcer / Janey Hagstrom / Ethan Foley / Sparkle / Yo-Yo Victim / Internati - onal Operator / Tobias / Dakota / Kid #2 / 'She-Bear' / Email / Jack's Girlfrien - d / Commercial Narrator / Sunday School Teacher / Shouting Crowd / Girl in Viagr - aGaine Commercial / Saleswoman / Bartender / Saultery Stevens / Real Housewife # - 1 / Female Pin Vendor / Granddaughter / Amber / Anya / Girls / Stewardess #1 / W - oman at Pool / 'He's got a gun!' / Various Kids / Ex-Wife #1 / Unimpressed Fan # - 2 / Ruthie / Tobey Hunter / Jennifer / Car Buyer / Child at Science Fair / Campa - ign Worker #1 / Milhouse / 1st Girl in Cafeteria + role: Milhouse Van Houten thumb: https://m.media-amazon.com/images/M/MV5BMTY1NjU1ODI1Ml5BMl5BanBnXkFtZTYwNDE2ODQz .jpg order: 6 imageHasChanged: false - - id: + - id: nm0534134 name: Tress MacNeille - role: - Agnes Skinner / Dolph / Lindsey Naegle / Various / Crazy Cat Lady / Mrs. Muntz / - Brandine / Brandine Spuckler / Shauna Chalmers / Dolph Starbeam / Cookie Kwan / - Bernice Hibbert / Lunchlady Doris / Nurse / Various Kids / Lindsay Naegle / Man - jula / Dubya Spuckler / Shauna / Lunchlady Dora / Dolph Shapiro / Mrs. Glick / L - ewis / Woman / Miss Springfield / Various People / Waitress / Socialite / Mrs. V - anderbilt / Jimbo Jones / Old Lady / Cora / Myra / Lindsey Neagle / Boy / Lady / - Booberella / Tour Guide / Plopper / Whitney Spuckler / Crowd Members / Alexa / - Brunella Pommelhorst / Cosine / Librarian / Lindsay Neagle / Additional Voices / - Martha / Maya / Opal / Teacher / Little Boy / Poor Violet / Witch / Brandine De - l Roy / Mother / Kumiko / Virgin Mary / Amazon Alexa / Children / Belle / Woman - on Street / Pigs / Aide / Gov. Mary Bailey / Queen Elizabeth II / Actress / Gyps - y / Medicine Woman / Brenda / Mom / Others / Inga / Old Woman / Clara / Operator - / Wife / Noah's Mother / Kenny / Gino Terwilliger / TV Executive / Producer / L - ucy / Woman #2 / Mrs. Burns / Ms. Albright / Tina / Sandra / Paper Boy / Animal - Shelter Worker / Girl / Terri / Parade Commentator / Cienega / Airport Announcer - / Saleswoman / Baseball Player / Churchgoer / Amy / Receptionist + role: Agnes Skinner thumb: https://m.media-amazon.com/images/M/MV5BMTQxMTQ1NzY5N15BMl5BanBnXkFtZTcwMDY5NDUw OA@@.jpg order: 7 imageHasChanged: false - - id: + - id: nm0249283 name: Chris Edgerly - role: - Additional Voices / Crowd Members / Laughing Crowd / Various Voices / The Detona - tor / Cheering Crowd / Veteran / Audience Members / Crowd / Actor Playing Homer - / Angry Crowd / Tucker Carlson / Lifeguard / Death / Angry Driver / Parrot / Mar - lin / Huff'n'puffalus / Baldwin / Alien #1 / Giant Grasshopper / Drunk / Bar Pat - ron / Customer / Others / Santa's Little Hybrid / Casino Bouncer #2 / Paramedic - / Demon / Train Engine / Man at Social Services / Jury Crowd / Bob Johnson / La - Coffee Nostra Waiter / Water Park Members / Elders / Man of Couple / Imaginative - Talking Disposal Unit / Nuclear Power Plant Audience Members / Comedy Club Crow - d / Boston Football Fan / Ricky Ricardo / America's Media Company Employee / Cha - rlie / Jimmy Jimmy / Yale Dean / Jaime Escalante / Hobbit / William Shakespeare - / Plane People / Fund Bunch bowler 1 / Xylem / Conor / Tunnel-Rat / Leroy Jethro - Gibbs / Marv Szyslak / Hamster's Mind / Person with Smoker / Danish Police Offi - cer / Promise Keeper / Coders / Squirrel / Clowns / Trump Voter / Stewie Griffin - / Don / Lucas / Just Desserts Owner / Krusty the Clown Writer #2 / Krusty the C - lown Writer #3 / Springfield Nuclear Power Plant Employees / Film Actor / Massag - er / Soft Man / The Toast Master Guy / The Toast Master Voice Generator / Hockey - Gamer / Audience Member / Calgary Lames Team Member / Josh.0 Host / Miles / Gus - tavo Dudamel / Small Parent / Youth Orchestra Students / Dynamic Bodymaker / Hov - erboard Guy / Suited Employees / Jimmy Kissass / Amphibian Man / Ticket Salesman - / Aquatraz Water Park Announcer / Gorilla Sanctuaries Host #2 / Foam Lenny at F - ox Announcer / Airport Manager / First Thanksgiving Celebrators / David Bowie / - Mr. Prince / Santa Claus / Jim / Librarian + role: Additional Voices thumb: https://m.media-amazon.com/images/M/MV5BNWEzNjVhM2MtYmNlOS00NTk4LWI5ZjYtYWQyMWY2 ZWNjNGQwXkEyXkFqcGc@.jpg order: 8 imageHasChanged: false - - id: + - id: nm0744648 name: Maggie Roswell - role: - Helen Lovejoy / Luann Van Houten / Maude Flanders / Miss Hoover / Elizabeth Hoov - er / Nurse / Others / Various / Librarian / Martha Quimby / Mother / Judge / Fan - #1 / Reporter / Gavin's Mom / Mrs. Winfield / Shary Bobbins / Additional Voices - / Ruth Powers / Gov. Mary Bailey / Emma / Auditioning Woman #1 / Dancing Girl # - 2 / Bowler #3 / Starlet / Bernice Hibbert / Chorus Girl #2 / Quimby's Wife / Far - rah Fawcett-Majors-O'Neal-Varney / Kitty Carlisle / KBBL Boss / Child #3 / Tar P - it Information Speaker / Child #4 / Richard / Janey Powell / Mrs. Spencer / Woma - n at Observatory / Operator / Singing Nun / Female Tour Guide / Jimmy Stewart's - Granddaughter / Mail Lady / Warren / Maude Flanders #1 / Showgirl / Kristin Shep - ard / Woman at Park / Newsreader / Fergie / Mrs. Norton / NASA Scientist #2 / Pe - ggy Bundy / Mrs. Phillips / Screaming Woman / Woman at Science Fair / Lady / Tel - emarketer #2 / All is well / Police Photographer / Thelma / Ashley Grant / Prote - sting Woman / Woman on Chat Show / Woman in Gentle Ben Audience #1 / Computer Vo - ice / Marguerita / Female Worker / Power Plant Voice / Inspector #1 / Applicatio - n Reviewer #2 / Answering Machine / Ticket Booth Lady / Distressed Woman / Dorot - hy / Building Owner / Three Stooges Actress / Employee / Guest / Princess Kashmi - r / Fe-Mail-Man / Photography Club Member #1 / Churchgoer / Sunday School Teache - r / Strawberry / Old Lady / Ticket Lady / Child Care Woman #1 / Bort #1's Mother - / Child Care Woman #2 / 'I'd be terribly embarrased if I were that boy's mother - ' / Animator #3 / School Nurse / Queen Elizabeth / Allison Taylor / Future Tour - Guide / Shelbyville Lemonade Kid / Singing Waiters / Architect Simpson / Sales C - oordinator Simpson + role: Helen Lovejoy thumb: https://m.media-amazon.com/images/M/MV5BOGE4ZmUyOTQtY2MyOS00ZTgwLThkMGEtNDlhMWMy MTY1ZDY4XkEyXkFqcGc@.jpg order: 9 imageHasChanged: false - - id: + - id: nm0927293 name: Karl Wiedergott - role: - Additional Voices / Various / Others / Jimmy Carter / Legs / Bill Clinton / Crow - d / Actor / Additional voices / Delivery Man / Diego / Jesse / Security Guard / - Prisoners / Burns' First Cellmate / Two-Hook Handed Man / High School Student / - Billy Carter / Kevin / Junkie / Huckleberry Hound / Burt Ward / Bum #1 / Jetskie - r #3 / Stage Manager / I'm afraid / Uncle Tyrone / Oliver / Cap'n Crunch / Trix - Rabbit / Gay Man / Cookie / Politician in a Think Tank / Candidates / Farmer #2 - / Mexicans / Man / Circuit Circus Employee / Ryan Lief / Lawyer / Telemarketer # - 1 / Pilot #1 / Man in Audience / Guy at the Airport / Pilot #2 / Happy Little El - f #2 / Robot #1 / Hockey Player / Fan #2 / Cecil / Writer #1 / Writer #3 / Ned F - landers / Valet / Citizen Worker / Editor / Partygoer / Kenny Loggins / SNPP Sec - urity Guard / Crane Operator / Groundskeeper Seamus / Umpire / Ribhead #4 / Parr - ot / Citizens / Soldier #2 / Psychologist #2 / Snowman / Reveller / Various Indi - an Men / Angelo / Caleb / Rick / Anthro Lecturer / Cook / Cruise Line Man #1 / A - ustralian Barfly / Repo Man #1 / Pick-Up Truck Driver / Hobo / Diner / Missionar - y 2 / Trucker 4 / Bodyguard #2 / Whiner #1 / Gay Man #2 / Gay Man #4 / Man with - Check / Poet Writers / Cecil Hampstead-on-Cecil Cecil / Master of Ceremonies / A - stronaut #1 / Garbage Man / Monster in Commercial / Screaming Man / Christian Ca - rrot Theater Announcer / Bacon Truck Driver / Announcer / Redneck #1 / Bum + role: Additional Voices thumb: order: 10 imageHasChanged: false - - id: + - id: nm0853122 name: Russi Taylor - role: - Martin Prince / Sherri / Terri / Uter / Wendell / Additional Voices / Lewis / Ma - rtin / Various / Others / Quimby's Mistress / Loni Anderson / Students / Mother - / Dr. Hibbert's Daughter / Sidney Swift / Ian / Billy / Chuck / Wendell Borton / - Mother in Baby-So-Fresh Commercial / Ewa / Stenographer / Son / Nun / Phone Voi - ce / Girls / Woman on Parole Board / Janey Powell / Ron Howard's Son / Gummy Sue - / Inanimate Carbon Rod #1 / Nuclear Waste / Mrs. Winfield / Orphan / Mount Holy - oke Sister / Vassar Sister / Adrian / Woman in Club / Various Students / Sisters - / Singing Kid in Commercial / Ronnie Beck / Craig's Girlfriend / Kids / Beezee - / Fish / Shirley Temple / Spirit #6 / Mrs. Prince / Sushi / Üter Zörker / Anonym - ous Children + role: Martin Prince thumb: https://m.media-amazon.com/images/M/MV5BMTk1ODEwNTIxNV5BMl5BanBnXkFtZTcwNjA4ODkx OA@@.jpg order: 11 imageHasChanged: false - - id: + - id: nm0908761 name: Marcia Wallace - role: Edna Krabappel / Ms. Melon / Woman in Bar / Crowd / Others / Spirit #1 + role: Edna Krabappel thumb: https://m.media-amazon.com/images/M/MV5BMTM5ODczNTY5M15BMl5BanBnXkFtZTgwNTUwMTk2 MDE@.jpg order: 12 imageHasChanged: false - - id: + - id: nm0724656 name: Kevin Michael Richardson - role: - Dr. Hibbert / Anger Watkins / Security Guard / Narrator / Jay G / Maxwell Flinch - / Mark / Burns' Cellmate / Mall Cop / Lady Gaga Express Conductor / Various Voi - ces / SendEx employee / Party Guest / Retirement Castle orderly / Jamaican Music - ian / Cop with a Secret / FBI Agent / Prison inmate / Chester / Jamaican Krusty - / Albert / Nigerian King / Black Prison Guard #1 / The Patriarch / End Credits S - inger / Deuce / Andre / Bird 3 / Stogie / Pazuzu / Bechdel Test Announcer / Azzl - an / Icewalker / Louis Armstrong / Bleeding Gums Murphy's Nephew / Scott / Envel - ope / Michael Clarke Duncan-type / Cthulhu / Phloem / Bookaccino's employee / Ji - mi Hendrix / Coders / Fats / Tubberware Bowl / Sprawl Mart Employee / Atlanta Fa - lcon Fan / Mr. Monroe / Snow Ballroom Bodyguard / GoodCop DogCop Police Manager - / Earl Woods / Basketball Gamer / Louie Armstrong / Sonny Go-along / Demogorgon - / Demogorgon Flanders / Lewis / JJ Hibbert / Jelly Monster / Gautama Buddha / Po - lice Guard / Boarding School Manager / Tupac Shakur / 'The Ballad of John Homer' - singer / Distributed Ledger / People at Therapy / Black Voodoo / Moby Man / Dav - e Chappelle / Michael / Preacher Mac / Jupiter / Security guard / Puppy Goo-Goo - / Cyclist / Knight / Marvel Fan / Apology Instructor / Loitering Guy / Lead Chur - ch Singer / Black Singer / Crowd Members / Wakkety Yak / The Shipper / Devil / C - hristmas Choir Singer / Parking Guy / Isotope Stadium Manager / Mac Tonight / MI - 5 Agents / Agent / Male Director Fan / Groot / Devil Moe / Co-Pilot / Oliver Har - dy / Audio Spy Manager / Film Student #2 / Jed Hawk / Manager + role: Dr. Hibbert thumb: https://m.media-amazon.com/images/M/MV5BMTI0OTg2NDQyOV5BMl5BanBnXkFtZTcwMTAwNjkz MQ@@.jpg order: 13 imageHasChanged: false - - id: + - id: nm0246373 name: Alex Désert - role: - Carl Carlson / Lou / Security Guard / Fausto / Audrey II / FBI Officer #1 / Empl - oyee Praising Dance Moves / Partygoer #2 / Gamer / Hockey Player / Wyatt / Fathe - r / H.O.A.G.I.E. Man / Chunk Mafia / New York Yankees Player / Male Scientist / - Judge + role: Carl Carlson thumb: https://m.media-amazon.com/images/M/MV5BMTY1ODM2NDMzNF5BMl5BanBnXkFtZTcwNzkzMTgy Mg@@.jpg order: 14 imageHasChanged: false - - id: + - id: nm0217221 name: Grey DeLisle - role: - Martin Prince / Terri / Sherri / Terri Mackleberry / Sherri Mackleberry / Leland - Huebner III / Young Woman / Martin Prince's Brother / Martin's Child #2 / Women - 's Bar Customer / Airport Passerby #2 / Airport Passerby #4 / Angry Crowd / Taxi - dermy Teacher / French Fry / Francine / Little Girl in Commercial / Parking Enfo - rcement Leader / Riley / Gloria Prince / Crowd Members / Iris Dalrymple / Malibu - Stacy / Mr. Tumnus / Derren Brown Emplyoee / Üter Zörker / Laney / Sherri and T - erri Mackleberry / Hub's Mother / Golf Commentator / Harper Poppyseed / She-E-O - Journalist / Amber + role: Martin Prince thumb: https://m.media-amazon.com/images/M/MV5BMjg2MTQxOTUyMl5BMl5BanBnXkFtZTcwMTgxNTEx OA@@.jpg order: 15 imageHasChanged: false - - id: + - id: nm0367005 name: Phil Hartman - role: - Troy McClure / Lionel Hutz / Lyle Lanley / Hospital Chairman / Fat Tony / Charlt - on Heston / Heavenly Voice / Barfly / Joey / Godfather / Evan Conover / Tom / Ne - lson's Dad / Football Commentator / Tuesday Night Live Announcer / Stockbroker / - Horst / Commercial Announcer #2 / Man in Radio Commercial / Mobster / Moses / C - able Guy / Mexican wrestling announcer / Congressman / Smooth Jimmy Apollo / 'Lo - ok at that stupid kid!' / Plato / Duff Gardens Commercial Voice-Over / Bill Clin - ton + role: Troy McClure thumb: https://m.media-amazon.com/images/M/MV5BNTEyNjYxNTY3Ml5BMl5BanBnXkFtZTYwNTM1MDE0 .jpg order: 16 imageHasChanged: false - - id: + - id: nm0001505 name: Joe Mantegna - role: Fat Tony / Fit Tony / Gordus Antonius / Fat Tony's Former Father + role: Fat Tony thumb: https://m.media-amazon.com/images/M/MV5BMTYwMzY4NDYwN15BMl5BanBnXkFtZTgwNDM3OTY4 MzE@.jpg order: 17 imageHasChanged: false - - id: + - id: nm0507097 name: Dawnn Lewis - role: - Bernice Hibbert / Female Party Guest / Miata / Opal / Juvenile Court Bailiff / C - o-Pilot / Pat O'Brien's Waitress / Lenora Carter / Carlotta Carlson / Hotshot la - wyer / Cannabis Influencer / Flight Attendant / Texxon Customer / Female Firefig - hter / Daly Night / Etta Pryor / Mrs. McBride / Allergy Doctor / Court Bailiff / - Female Criticizing Workforce / Naima / Grocery Store Clerk / Flight Attendance - #2 / Morher / Karate Kids / Ursula / Party guest / Sarah Wiggum / Pam / Perimeno - pause: The Musical Singer + role: Bernice Hibbert thumb: https://m.media-amazon.com/images/M/MV5BMTYwNTk1NTA3Nl5BMl5BanBnXkFtZTYwNTAyMTM1 .jpg order: 18 imageHasChanged: false - - id: + - id: nm0005606 name: Maurice LaMarche - role: - Orson Welles / Hedonismbot / Eudora Welty / George C. Scott / Erman Millwood / S - tarsky / Hutch / Admiral Crunchy / Recruiter #2 / Cap'n Crunch / Jock #2 / Addit - ional Voices / Gate Guard #1 / Harvest Fest Worker / Mr. Burns' Doctor / Command - er McBragg / Billy / Oceanographer / Screaming Cop / City Inspector / Fox announ - cer / Chinese #1 / Chinese #4 / Leprechaun / Charlie Sheen / Chef Naziwa / Marlo - n Brando / Milo / Morbo / Lrrr / Rodney Dangerfield / Clive Meriwether / Neil Si - mon / Vincent Price / Hedonismbot Cosplayer / Jerry Seinfeld / Fred Flintstone P - arody / Wlihelm von Wonthelm + role: Orson Welles thumb: https://m.media-amazon.com/images/M/MV5BY2UwY2MxODMtNmZhZi00NzI0LTg5YzMtY2MzZjg0 YTZmNjczXkEyXkFqcGc@.jpg order: 19 imageHasChanged: false - - ... and >30 more + - ... and >200 more posters: (N=1) - id: originalUrl: https://m.media-amazon.com/images/M/MV5BNTU2OWE0YWYtMjRlMS00NTUwLWJmZWUtODFhNzJiMGJlMzI3XkEyXkFqcGc@.jpg @@ -460,5 +234,5 @@ backdrops: (N=0) banners: (N=0) hasTune: false extraFanarts: (N=0) -status: +status: Continuing dateAdded: <not set or invalid> diff --git a/test/scrapers/imdbtv/testImdbTvShowSearch.cpp b/test/scrapers/imdbtv/testImdbTvShowSearch.cpp index ea99ff7697..e3feaf8aa9 100644 --- a/test/scrapers/imdbtv/testImdbTvShowSearch.cpp +++ b/test/scrapers/imdbtv/testImdbTvShowSearch.cpp @@ -16,7 +16,7 @@ TEST_CASE("ImdbTv returns valid search results", "[tv][ImdbTv][search]") auto* searchJob = new ImdbTvShowSearchJob(getImdbApi(), config); const auto scraperResults = test::searchTvScraperSync(searchJob).first; - REQUIRE(scraperResults.length() >= 6); + REQUIRE(scraperResults.length() >= 1); CHECK(scraperResults[0].title == "The Simpsons"); CHECK(scraperResults[0].identifier.str() == "tt0096697"); CHECK(scraperResults[0].released == QDate(1989, 1, 1)); // only year is set @@ -24,13 +24,15 @@ TEST_CASE("ImdbTv returns valid search results", "[tv][ImdbTv][search]") SECTION("Search by TV show name in other languages returns correct results") { + // The Suggest API always returns original titles (not localized). + // Locale has no effect on search results — localization happens at detail loading. ShowSearchJob::Config config{"Scrubs", Locale("de-DE")}; auto* searchJob = new ImdbTvShowSearchJob(getImdbApi(), config); const auto scraperResults = test::searchTvScraperSync(searchJob).first; - REQUIRE(scraperResults.length() >= 3); // original + 2026 version + REQUIRE(scraperResults.length() >= 1); const int i = scraperResults[0].released.year() != 2026 ? 0 : 1; - CHECK(scraperResults[i].title == "Scrubs: Die Anfänger"); + CHECK(scraperResults[i].title == "Scrubs"); CHECK(scraperResults[i].identifier.str() == "tt0285403"); CHECK(scraperResults[i].released == QDate(2001, 1, 1)); // only year is set } diff --git a/test/scrapers/testImdbMovie.cpp b/test/scrapers/testImdbMovie.cpp index acd6c3c53b..cc1c658b5a 100644 --- a/test/scrapers/testImdbMovie.cpp +++ b/test/scrapers/testImdbMovie.cpp @@ -131,13 +131,14 @@ TEST_CASE("IMDb scrapes correct movie details", "[movie][IMDb][load_data]") SECTION("'load all tags' is false") { + // With the GraphQL API, all keywords are always loaded in a single request. + // The loadAllTags flag has no effect on the number of tags returned. auto scrapeJob = makeScrapeJob("tt0111161", false); test::scrapeMovieScraperSync(scrapeJob.get(), false); auto& m = scrapeJob->movie(); const auto tags = m.tags(); REQUIRE(tags.size() >= 2); - REQUIRE(tags.size() <= 20); CHECK_THAT(tags, Contains("escape from prison")); } } diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index b9da6c28d6..10242a1049 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -17,7 +17,6 @@ target_sources( movie/testMovieFileSearcher.cpp renamer/testPlaceholderParser.cpp renamer/testRenamer.cpp - scrapers/testImdbTvEpisodeParser.cpp scrapers/custom_movie_scraper/StubMovieScraper.cpp scrapers/custom_movie_scraper/testCustomMovieScraper.cpp scrapers/testMovieMerger.cpp diff --git a/test/unit/scrapers/testImdbTvEpisodeParser.cpp b/test/unit/scrapers/testImdbTvEpisodeParser.cpp deleted file mode 100644 index fb2bf87a16..0000000000 --- a/test/unit/scrapers/testImdbTvEpisodeParser.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "test/test_helpers.h" - -#include "data/tv_show/TvShowEpisode.h" -#include "scrapers/tv_show/imdb/ImdbTvEpisodeParser.h" - -using namespace mediaelch::scraper; - -TEST_CASE("ImdbTvEpisodeParser extracts an episode id from season page", "[episode][ImdbTv][parse_data]") -{ - // Taken from https://www.imdb.com/title/tt0096697/episodes?season=4 on 2023-11-04 - QString episodeEntryHtml = - R"raw("value":"Unknown"}],"episodes":{"items":[{"id":"tt0701142","type":"tvEpisode","season":"4","episode":"1","titleText":"Kamp Krusty",)raw" - R"raw("releaseDate":{"month":4,"day":14,"year":1994,"__typename":"ReleaseDate"},"releaseYear":1994,"image":{"url":"https://m.media-amazon.com/)raw" - R"raw("images/M/MV5BNmYwNGU0MjctYzYzYS00NGE2LWIzMGEtMWVhYjgyMmU2YmE1XkEyXkFqcGc@._V1_.jpg","maxHeight":1280,"maxWidth":853,"caption":"Nancy )raw" - R"raw("Cartwright and Dan Castellaneta in Die Simpsons (1989)"},"plot":"Bart and Lisa attend \u0026quot;Kamp Krusty\u0026quot; but it is nothing like )raw" - R"raw("they thought it would be; Homer\u0026#39;s hair grows back and he loses weight while the kids are away.","aggregateRating":8.5,"voteCount":4745,)raw" - R"raw("canRate":true,"contributionUrl":"https://contribute.imdb.com/image/tt0701142/add?bus=imdb\u0026return_url=https%3A%2F%2Fwww.imdb.com%2Fclose_me\u0026site=web"},)raw" - R"raw("{"id":"tt0701048","type":"tvEpisode","season":"4","episode":"2","titleText":"A Streetcar Named Marge","releaseDate":{"month":2,"day":16,"year":1993,)raw" - R"raw(""__typename":"ReleaseDate"},"releaseYear":1993,"image":{"url":"https://m.media-amazon.com/images/M/MV5BODgyNjgyYmEtNmE2Zi00ZGFkLWE3MWMtNTE4NjhhMzMwZjA1)raw" - R"raw("XkEyXkFqcGc@._V1_.jpg","maxHeight":576,"maxWidth":768,"caption":"Julie Kavner in Die)raw"; - - ImdbId expectedEpisodeId("tt0701048"); - TvShowEpisode episode; - episode.setSeason(SeasonNumber(4)); - episode.setEpisode(EpisodeNumber(2)); - - ImdbTvEpisodeParser::parseIdFromSeason(episode, episodeEntryHtml); - CHECK(episode.imdbId() == expectedEpisodeId); -}