diff --git a/README.md b/README.md index 380ae56..22d1a4b 100644 --- a/README.md +++ b/README.md @@ -388,4 +388,4 @@ android { jvmTarget = "1.8" } } -``` \ No newline at end of file +``` diff --git a/pom.xml b/pom.xml index c5eb374..820cd35 100644 --- a/pom.xml +++ b/pom.xml @@ -4,9 +4,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - com.github.ikiulian + com.github.kiulian java-youtube-downloader - 3.2.3 + 3.2.4 Java youtube video downloader Java parser for retrieving youtube video meta info @@ -64,7 +64,13 @@ 1.2.83 - + + + org.apache.commons + commons-text + 1.7 + + - \ No newline at end of file + diff --git a/src/main/java/com/github/kiulian/downloader/extractor/Extractor.java b/src/main/java/com/github/kiulian/downloader/extractor/Extractor.java index 456a3d9..67412cf 100644 --- a/src/main/java/com/github/kiulian/downloader/extractor/Extractor.java +++ b/src/main/java/com/github/kiulian/downloader/extractor/Extractor.java @@ -15,6 +15,8 @@ public interface Extractor { JSONObject extractPlayerConfigFromHtml(String html) throws YoutubeException; List extractSubtitlesLanguagesFromXml(String xml) throws YoutubeException; + + String extractSubtitleUrlFromHtml(String html, String videoId) throws YoutubeException; String extractJsUrlFromConfig(JSONObject config, String videoId) throws YoutubeException; diff --git a/src/main/java/com/github/kiulian/downloader/extractor/ExtractorImpl.java b/src/main/java/com/github/kiulian/downloader/extractor/ExtractorImpl.java index 4effbd3..f742dc3 100644 --- a/src/main/java/com/github/kiulian/downloader/extractor/ExtractorImpl.java +++ b/src/main/java/com/github/kiulian/downloader/extractor/ExtractorImpl.java @@ -9,6 +9,7 @@ import com.github.kiulian.downloader.downloader.Downloader; import com.github.kiulian.downloader.downloader.request.RequestWebpage; import com.github.kiulian.downloader.downloader.response.Response; +import org.apache.commons.text.StringEscapeUtils; import java.util.*; import java.util.regex.Matcher; @@ -27,7 +28,7 @@ public class ExtractorImpl implements Extractor { Pattern.compile("ytInitialData = (\\{.*?\\});") ); - private static final Pattern SUBTITLES_LANG_CODE_PATTERN = Pattern.compile("lang_code=\"(.{2,3})\""); + private static final Pattern SUBTITLES_LANG_CODE_PATTERN = Pattern.compile("lang=(.{2,3})"); private static final Pattern TEXT_NUMBER_REGEX = Pattern.compile("[0-9]+[0-9, ']*"); private static final Pattern ASSETS_JS_REGEX = Pattern.compile("\"assets\":.+?\"js\":\\s*\"([^\"]+)\""); private static final Pattern EMB_JS_REGEX = Pattern.compile("\"jsUrl\":\\s*\"([^\"]+)\""); @@ -101,6 +102,22 @@ public List extractSubtitlesLanguagesFromXml(String xml) throws YoutubeE return languages; } + @Override + public String extractSubtitleUrlFromHtml(String html, String videoId) throws YoutubeException { + String pattern = "https://www\\.youtube\\.com/api/timedtext\\?v=" + videoId + "[^\"']+"; + Pattern regex = Pattern.compile(pattern); + Matcher matcher = regex.matcher(html); + + if (!matcher.find()) { + throw new YoutubeException.BadPageException("Could not any subtitle url in the html"); + } + + String escapeUrl = matcher.group(0); + String url = StringEscapeUtils.unescapeJava(escapeUrl); + + return url; + } + @Override public String extractJsUrlFromConfig(JSONObject config, String videoId) throws YoutubeException { String js = null; diff --git a/src/main/java/com/github/kiulian/downloader/parser/ParserImpl.java b/src/main/java/com/github/kiulian/downloader/parser/ParserImpl.java index 33de12c..13567be 100644 --- a/src/main/java/com/github/kiulian/downloader/parser/ParserImpl.java +++ b/src/main/java/com/github/kiulian/downloader/parser/ParserImpl.java @@ -46,7 +46,8 @@ public ParserImpl(Config config, Downloader downloader, Extractor extractor, Cip public Response parseVideo(RequestVideoInfo request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parseVideo(request.getVideoId(), request.getCallback())); + Future result = executorService + .submit(() -> parseVideo(request.getVideoId(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -59,7 +60,8 @@ public Response parseVideo(RequestVideoInfo request) { private VideoInfo parseVideo(String videoId, YoutubeCallback callback) throws YoutubeException { // try to spoof android - // workaround for issue https://github.com/sealedtx/java-youtube-downloader/issues/97 + // workaround for issue + // https://github.com/sealedtx/java-youtube-downloader/issues/97 VideoInfo videoInfo = parseVideoAndroid(videoId, callback); if (videoInfo == null) { videoInfo = parseVideoWeb(videoId, callback); @@ -73,8 +75,7 @@ private VideoInfo parseVideo(String videoId, YoutubeCallback callback private VideoInfo parseVideoAndroid(String videoId, YoutubeCallback callback) throws YoutubeException { String url = "https://youtubei.googleapis.com/youtubei/v1/player?key=" + ANDROID_APIKEY; - String body = - "{" + + String body = "{" + " \"videoId\": \"" + videoId + "\"," + " \"context\": {" + " \"client\": {" + @@ -129,7 +130,8 @@ private VideoInfo parseVideoWeb(String videoId, YoutubeCallback callb Response response = downloader.downloadWebpage(new RequestWebpage(htmlUrl)); if (!response.ok()) { - YoutubeException e = new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", htmlUrl, response.error().getMessage())); + YoutubeException e = new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", htmlUrl, response.error().getMessage())); if (callback != null) { callback.onError(e); } @@ -169,7 +171,8 @@ private VideoInfo parseVideoWeb(String videoId, YoutubeCallback callb } throw e; } - JSONObject context = playerConfig.getJSONObject("args").getJSONObject("player_response").getJSONObject("responseContext"); + JSONObject context = playerConfig.getJSONObject("args").getJSONObject("player_response") + .getJSONObject("responseContext"); String clientVersion = extractor.extractClientVersionFromContext(context); List formats; try { @@ -202,7 +205,8 @@ private VideoDetails parseVideoDetails(String videoId, JSONObject playerResponse return new VideoDetails(videoDetails, liveHLSUrl); } - private List parseFormats(JSONObject playerResponse, String jsUrl, String clientVersion) throws YoutubeException { + private List parseFormats(JSONObject playerResponse, String jsUrl, String clientVersion) + throws YoutubeException { if (!playerResponse.containsKey("streamingData")) { throw new YoutubeException.BadPageException("streamingData not found"); } @@ -223,7 +227,8 @@ private List parseFormats(JSONObject playerResponse, String jsUrl, Strin return formats; } - private void populateFormats(List formats, JSONArray jsonFormats, String jsUrl, boolean isAdaptive, String clientVersion) throws YoutubeException.CipherException { + private void populateFormats(List formats, JSONArray jsonFormats, String jsUrl, boolean isAdaptive, + String clientVersion) throws YoutubeException.CipherException { for (int i = 0; i < jsonFormats.size(); i++) { JSONObject json = jsonFormats.getJSONObject(i); if ("FORMAT_STREAM_TYPE_OTF".equals(json.getString("type"))) @@ -251,7 +256,8 @@ private void populateFormats(List formats, JSONArray jsonFormats, String } } - private Format parseFormat(JSONObject json, String jsUrl, Itag itag, boolean isAdaptive, String clientVersion) throws YoutubeException { + private Format parseFormat(JSONObject json, String jsUrl, Itag itag, boolean isAdaptive, String clientVersion) + throws YoutubeException { if (json.containsKey("signatureCipher")) { JSONObject jsonCipher = new JSONObject(); String[] cipherData = json.getString("signatureCipher").replace("\\u0026", "&").split("&"); @@ -270,7 +276,8 @@ private Format parseFormat(JSONObject json, String jsUrl, Itag itag, boolean isA } if (urlWithSig.contains("signature") - || (!jsonCipher.containsKey("s") && (urlWithSig.contains("&sig=") || urlWithSig.contains("&lsig=")))) { + || (!jsonCipher.containsKey("s") + && (urlWithSig.contains("&sig=") || urlWithSig.contains("&lsig=")))) { // do nothing, this is pre-signed videos with signature } else if (jsUrl != null) { String s = jsonCipher.getString("s"); @@ -334,7 +341,8 @@ private List parseCaptions(JSONObject playerResponse) { public Response parsePlaylist(RequestPlaylistInfo request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parsePlaylist(request.getPlaylistId(), request.getCallback())); + Future result = executorService + .submit(() -> parsePlaylist(request.getPlaylistId(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -346,12 +354,14 @@ public Response parsePlaylist(RequestPlaylistInfo request) { } - private PlaylistInfo parsePlaylist(String playlistId, YoutubeCallback callback) throws YoutubeException { + private PlaylistInfo parsePlaylist(String playlistId, YoutubeCallback callback) + throws YoutubeException { String htmlUrl = "https://www.youtube.com/playlist?list=" + playlistId; Response response = downloader.downloadWebpage(new RequestWebpage(htmlUrl)); if (!response.ok()) { - YoutubeException e = new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", htmlUrl, response.error().getMessage())); + YoutubeException e = new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", htmlUrl, response.error().getMessage())); if (callback != null) { callback.onError(e); } @@ -391,7 +401,8 @@ private PlaylistDetails parsePlaylistDetails(String playlistId, JSONObject initi String title = initialData.getJSONObject("metadata") .getJSONObject("playlistMetadataRenderer") .getString("title"); - JSONArray sideBarItems = initialData.getJSONObject("sidebar").getJSONObject("playlistSidebarRenderer").getJSONArray("items"); + JSONArray sideBarItems = initialData.getJSONObject("sidebar").getJSONObject("playlistSidebarRenderer") + .getJSONArray("items"); String author = null; try { // try to retrieve author, some playlists may have no author @@ -408,13 +419,15 @@ private PlaylistDetails parsePlaylistDetails(String playlistId, JSONObject initi JSONArray stats = sideBarItems.getJSONObject(0) .getJSONObject("playlistSidebarPrimaryInfoRenderer") .getJSONArray("stats"); - int videoCount = extractor.extractIntegerFromText(stats.getJSONObject(0).getJSONArray("runs").getJSONObject(0).getString("text")); + int videoCount = extractor + .extractIntegerFromText(stats.getJSONObject(0).getJSONArray("runs").getJSONObject(0).getString("text")); long viewCount = extractor.extractLongFromText(stats.getJSONObject(1).getString("simpleText")); return new PlaylistDetails(playlistId, title, author, videoCount, viewCount); } - private List parsePlaylistVideos(JSONObject initialData, int videoCount) throws YoutubeException { + private List parsePlaylistVideos(JSONObject initialData, int videoCount) + throws YoutubeException { JSONObject content; try { @@ -445,7 +458,8 @@ private List parsePlaylistVideos(JSONObject initialData, i return videos; } - private void populatePlaylist(JSONObject content, List videos, String clientVersion) throws YoutubeException { + private void populatePlaylist(JSONObject content, List videos, String clientVersion) + throws YoutubeException { JSONArray contents; if (content.containsKey("contents")) { // parse first items (up to 100) contents = content.getJSONArray("contents"); @@ -479,7 +493,8 @@ private void populatePlaylist(JSONObject content, List vid } } - private void loadPlaylistContinuation(String continuation, String ctp, List videos, String clientVersion) throws YoutubeException { + private void loadPlaylistContinuation(String continuation, String ctp, List videos, + String clientVersion) throws YoutubeException { JSONObject content; String url = "https://www.youtube.com/youtubei/v1/browse?key=" + ANDROID_APIKEY; @@ -499,7 +514,8 @@ private void loadPlaylistContinuation(String continuation, String ctp, List response = downloader.downloadWebpage(request); if (!response.ok()) { - throw new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); + throw new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); } String html = response.data(); @@ -527,7 +543,8 @@ private void loadPlaylistContinuation(String continuation, String ctp, List parseChannelsUploads(RequestChannelUploads request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parseChannelsUploads(request.getChannelId(), request.getCallback())); + Future result = executorService + .submit(() -> parseChannelsUploads(request.getChannelId(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -538,7 +555,8 @@ public Response parseChannelsUploads(RequestChannelUploads request } } - private PlaylistInfo parseChannelsUploads(String channelId, YoutubeCallback callback) throws YoutubeException { + private PlaylistInfo parseChannelsUploads(String channelId, YoutubeCallback callback) + throws YoutubeException { String playlistId = null; if (channelId.length() == 24 && channelId.startsWith("UC")) { // channel id pattern playlistId = "UU" + channelId.substring(2); // replace "UC" with "UU" @@ -547,7 +565,8 @@ private PlaylistInfo parseChannelsUploads(String channelId, YoutubeCallback response = downloader.downloadWebpage(new RequestWebpage(channelLink)); if (!response.ok()) { - YoutubeException e = new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", channelLink, response.error().getMessage())); + YoutubeException e = new YoutubeException.DownloadException(String + .format("Could not load url: %s, exception: %s", channelLink, response.error().getMessage())); if (callback != null) { callback.onError(e); } @@ -579,7 +598,8 @@ private PlaylistInfo parseChannelsUploads(String channelId, YoutubeCallback> parseSubtitlesInfo(RequestSubtitlesInfo request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future> result = executorService.submit(() -> parseSubtitlesInfo(request.getVideoId(), request.getCallback())); + Future> result = executorService + .submit(() -> parseSubtitlesInfo(request.getVideoId(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -590,21 +610,27 @@ public Response> parseSubtitlesInfo(RequestSubtitlesInfo req } } - private List parseSubtitlesInfo(String videoId, YoutubeCallback> callback) throws YoutubeException { - String xmlUrl = "https://video.google.com/timedtext?hl=en&type=list&v=" + videoId; - + private List parseSubtitlesInfo(String videoId, YoutubeCallback> callback) + throws YoutubeException { + String xmlUrl = "https://www.youtube.com/watch?v=" + videoId; + Response response = downloader.downloadWebpage(new RequestWebpage(xmlUrl)); if (!response.ok()) { - YoutubeException e = new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", xmlUrl, response.error().getMessage())); + YoutubeException e = new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", xmlUrl, response.error().getMessage())); if (callback != null) { callback.onError(e); } throw e; } - String xml = response.data(); + String html = response.data(); List languages; + + String url; try { - languages = extractor.extractSubtitlesLanguagesFromXml(xml); + url = extractor.extractSubtitleUrlFromHtml(html, videoId); + languages = extractor.extractSubtitlesLanguagesFromXml(url); + } catch (YoutubeException e) { if (callback != null) { callback.onError(e); @@ -614,11 +640,8 @@ private List parseSubtitlesInfo(String videoId, YoutubeCallback subtitlesInfo = new ArrayList<>(); for (String language : languages) { - String url = String.format("https://www.youtube.com/api/timedtext?lang=%s&v=%s", - language, videoId); subtitlesInfo.add(new SubtitlesInfo(url, language, false)); } - return subtitlesInfo; } @@ -626,7 +649,8 @@ private List parseSubtitlesInfo(String videoId, YoutubeCallback parseSearchResult(RequestSearchResult request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parseSearchResult(request.query(), request.encodeParameters(), request.getCallback())); + Future result = executorService.submit( + () -> parseSearchResult(request.query(), request.encodeParameters(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -641,7 +665,8 @@ public Response parseSearchResult(RequestSearchResult request) { public Response parseSearchContinuation(RequestSearchContinuation request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parseSearchContinuation(request.continuation(), request.getCallback())); + Future result = executorService + .submit(() -> parseSearchContinuation(request.continuation(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -656,7 +681,8 @@ public Response parseSearchContinuation(RequestSearchContinuation public Response parseSearcheable(RequestSearchable request) { if (request.isAsync()) { ExecutorService executorService = config.getExecutorService(); - Future result = executorService.submit(() -> parseSearchable(request.searchPath(), request.getCallback())); + Future result = executorService + .submit(() -> parseSearchable(request.searchPath(), request.getCallback())); return ResponseImpl.fromFuture(result); } try { @@ -667,7 +693,8 @@ public Response parseSearcheable(RequestSearchable request) { } } - private SearchResult parseSearchResult(String query, String parameters, YoutubeCallback callback) throws YoutubeException { + private SearchResult parseSearchResult(String query, String parameters, YoutubeCallback callback) + throws YoutubeException { String searchQuery; try { searchQuery = URLEncoder.encode(query, "UTF-8"); @@ -689,7 +716,8 @@ private SearchResult parseSearchResult(String query, String parameters, YoutubeC } } - private SearchResult parseSearchable(String searchPath, YoutubeCallback callback) throws YoutubeException { + private SearchResult parseSearchable(String searchPath, YoutubeCallback callback) + throws YoutubeException { String url = "https://www.youtube.com" + searchPath; try { return parseHtmlSearchResult(url); @@ -704,11 +732,12 @@ private SearchResult parseSearchable(String searchPath, YoutubeCallback response = downloader.downloadWebpage(new RequestWebpage(url)); if (!response.ok()) { - throw new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); + throw new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); } String html = response.data(); - + JSONObject initialData = extractor.extractInitialDataFromHtml(html); JSONArray rootContents; try { @@ -720,14 +749,15 @@ private SearchResult parseHtmlSearchResult(String url) throws YoutubeException { } catch (NullPointerException e) { throw new YoutubeException.BadPageException("Search result root contents not found"); } - + long estimatedCount = extractor.extractLongFromText(initialData.getString("estimatedResults")); String clientVersion = extractor.extractClientVersionFromContext(initialData.getJSONObject("responseContext")); SearchContinuation continuation = getSearchContinuation(rootContents, clientVersion); return parseSearchResult(estimatedCount, rootContents, continuation); } - private SearchResult parseSearchContinuation(SearchContinuation continuation, YoutubeCallback callback) throws YoutubeException { + private SearchResult parseSearchContinuation(SearchContinuation continuation, + YoutubeCallback callback) throws YoutubeException { String url = "https://www.youtube.com/youtubei/v1/search?key=" + ANDROID_APIKEY + "&prettyPrint=false"; JSONObject body = new JSONObject() @@ -746,7 +776,8 @@ private SearchResult parseSearchContinuation(SearchContinuation continuation, Yo Response response = downloader.downloadWebpage(request); if (!response.ok()) { - YoutubeException e = new YoutubeException.DownloadException(String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); + YoutubeException e = new YoutubeException.DownloadException( + String.format("Could not load url: %s, exception: %s", url, response.error().getMessage())); if (callback != null) { callback.onError(e); } @@ -771,7 +802,7 @@ private SearchResult parseSearchContinuation(SearchContinuation continuation, Yo } catch (Exception e) { throw new YoutubeException.BadPageException("Could not parse search continuation json"); } - + long estimatedResults = extractor.extractLongFromText(jsonResponse.getString("estimatedResults")); SearchContinuation nextContinuation = getSearchContinuation(rootContents, continuation.clientVersion()); return parseSearchResult(estimatedResults, rootContents, nextContinuation); @@ -791,7 +822,8 @@ private SearchContinuation getSearchContinuation(JSONArray rootContents, String return null; } - private SearchResult parseSearchResult(long estimatedResults, JSONArray rootContents, SearchContinuation continuation) throws BadPageException { + private SearchResult parseSearchResult(long estimatedResults, JSONArray rootContents, + SearchContinuation continuation) throws BadPageException { JSONArray contents; try { @@ -826,26 +858,26 @@ private static SearchResultElement parseSearchResultElement(JSONObject jsonItem) String rendererKey = jsonItem.keySet().iterator().next(); JSONObject jsonRenderer = jsonItem.getJSONObject(rendererKey); switch (rendererKey) { - case "videoRenderer": - return new SearchResultVideoDetails(jsonRenderer, false); - case "movieRenderer": - return new SearchResultVideoDetails(jsonRenderer, true); - case "playlistRenderer": - return new SearchResultPlaylistDetails(jsonRenderer); - case "channelRenderer": - return new SearchResultChannelDetails(jsonRenderer); - case "shelfRenderer": - return new SearchResultShelf(jsonRenderer); - case "showingResultsForRenderer": - return new QueryAutoCorrection(jsonRenderer); - case "didYouMeanRenderer": - return new QuerySuggestion(jsonRenderer); - case "horizontalCardListRenderer": - return new QueryRefinementList(jsonRenderer); - default: - System.out.println("Unknown search result element type " + rendererKey); - System.out.println(jsonItem); - return null; + case "videoRenderer": + return new SearchResultVideoDetails(jsonRenderer, false); + case "movieRenderer": + return new SearchResultVideoDetails(jsonRenderer, true); + case "playlistRenderer": + return new SearchResultPlaylistDetails(jsonRenderer); + case "channelRenderer": + return new SearchResultChannelDetails(jsonRenderer); + case "shelfRenderer": + return new SearchResultShelf(jsonRenderer); + case "showingResultsForRenderer": + return new QueryAutoCorrection(jsonRenderer); + case "didYouMeanRenderer": + return new QuerySuggestion(jsonRenderer); + case "horizontalCardListRenderer": + return new QueryRefinementList(jsonRenderer); + default: + System.out.println("Unknown search result element type " + rendererKey); + System.out.println(jsonItem); + return null; } } }