Skip to content
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,4 +388,4 @@ android {
jvmTarget = "1.8"
}
}
```
```
14 changes: 10 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.github.ikiulian</groupId>
<groupId>com.github.kiulian</groupId>
<artifactId>java-youtube-downloader</artifactId>
<version>3.2.3</version>
<version>3.2.4</version>

<name>Java youtube video downloader</name>
<description>Java parser for retrieving youtube video meta info</description>
Expand Down Expand Up @@ -64,7 +64,13 @@
<version>1.2.83</version>
</dependency>

</dependencies>
<!-- text -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you implement StringEscapeUtils.unescapeJava() inside this project to avoid extra dependency?

<version>1.7</version>
</dependency>

</dependencies>

</project>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ public interface Extractor {
JSONObject extractPlayerConfigFromHtml(String html) throws YoutubeException;

List<String> extractSubtitlesLanguagesFromXml(String xml) throws YoutubeException;

String extractSubtitleUrlFromHtml(String html, String videoId) throws YoutubeException;

String extractJsUrlFromConfig(JSONObject config, String videoId) throws YoutubeException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.github.kiulian.downloader.downloader.Downloader;
import com.github.kiulian.downloader.downloader.request.RequestWebpage;
import com.github.kiulian.downloader.downloader.response.Response;
import org.apache.commons.text.StringEscapeUtils;

import java.util.*;
import java.util.regex.Matcher;
Expand All @@ -27,7 +28,7 @@ public class ExtractorImpl implements Extractor {
Pattern.compile("ytInitialData = (\\{.*?\\});")
);

private static final Pattern SUBTITLES_LANG_CODE_PATTERN = Pattern.compile("lang_code=\"(.{2,3})\"");
private static final Pattern SUBTITLES_LANG_CODE_PATTERN = Pattern.compile("lang=(.{2,3})");
private static final Pattern TEXT_NUMBER_REGEX = Pattern.compile("[0-9]+[0-9, ']*");
private static final Pattern ASSETS_JS_REGEX = Pattern.compile("\"assets\":.+?\"js\":\\s*\"([^\"]+)\"");
private static final Pattern EMB_JS_REGEX = Pattern.compile("\"jsUrl\":\\s*\"([^\"]+)\"");
Expand Down Expand Up @@ -101,6 +102,22 @@ public List<String> extractSubtitlesLanguagesFromXml(String xml) throws YoutubeE
return languages;
}

@Override
public String extractSubtitleUrlFromHtml(String html, String videoId) throws YoutubeException {
String pattern = "https://www\\.youtube\\.com/api/timedtext\\?v=" + videoId + "[^\"']+";
Pattern regex = Pattern.compile(pattern);
Matcher matcher = regex.matcher(html);

if (!matcher.find()) {
throw new YoutubeException.BadPageException("Could not any subtitle url in the html");
}

String escapeUrl = matcher.group(0);
String url = StringEscapeUtils.unescapeJava(escapeUrl);

return url;
}

@Override
public String extractJsUrlFromConfig(JSONObject config, String videoId) throws YoutubeException {
String js = null;
Expand Down
Loading