From e32a4335bc396a70c0148bac49afe4985ac59ff4 Mon Sep 17 00:00:00 2001 From: justanotherosinter <134204602+justanotherosinter@users.noreply.github.com> Date: Wed, 24 May 2023 16:27:48 -0400 Subject: [PATCH 1/3] Update cli.py Added YouTube module --- ghunt/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ghunt/cli.py b/ghunt/cli.py index c445b2c3..a05587e9 100644 --- a/ghunt/cli.py +++ b/ghunt/cli.py @@ -25,6 +25,11 @@ def parse_and_run(): parser_drive = subparsers.add_parser('drive', help="Get information on a Drive file or folder.") parser_drive.add_argument("file_id", help="Example: 1N__vVu4c9fCt4EHxfthUNzVOs_tp8l6tHcMBnpOZv_M") parser_drive.add_argument('--json', type=str, help="File to write the JSON output to.") + + ### YouTube module + parser_youtube = subparsers.add_parser('youtube', help="Get information on a YouTube channel (doesn't work with channels created after Google removed IDs from the page source, and relies on the page having been archived by Wayback Machine.") + parser_youtube.add_argument("channel_url", help="Example: https://www.youtube.com/@YouTube") + parser_youtube.add_argument('--json', type=str, help="File to write the JSON output to.") ### Parsing args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) @@ -44,4 +49,7 @@ def process_args(args: argparse.Namespace): trio.run(gaia.hunt, None, args.gaia_id, args.json) case "drive": from ghunt.modules import drive - trio.run(drive.hunt, None, args.file_id, args.json) \ No newline at end of file + trio.run(drive.hunt, None, args.file_id, args.json) + case "youtube": + from ghunt.modules import youtube + trio.run(youtube.hunt, None, args.channel_url, args.json) From 6c1e07cf9cfab53e7e83bbb0ecb99618c9aea43e Mon Sep 17 00:00:00 2001 From: justanotherosinter <134204602+justanotherosinter@users.noreply.github.com> Date: Wed, 24 May 2023 16:28:41 -0400 Subject: [PATCH 2/3] Create YouTube.py Module to allow a YouTube channel to be looked up in Ghunt. Rough version. --- ghunt/modules/YouTube.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 ghunt/modules/YouTube.py diff --git a/ghunt/modules/YouTube.py b/ghunt/modules/YouTube.py new file mode 100644 index 00000000..93fb3ed1 --- /dev/null +++ b/ghunt/modules/YouTube.py @@ -0,0 +1,24 @@ +from ghunt.helpers.utils import get_httpx_client +from ghunt import globals as gb + +import requests, re, waybackpy, argparse, trio, httpx + +async def hunt(as_client: httpx.AsyncClient, channel_url: str, json_file: bool=None): + # later: add a way to change this later + User_Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0" + r = requests.get(channel_url) + matchChannelID = re.search("(https?:\/\/)(www\.)?youtube\.com\/(channel)\/[\w-]+", r.text) + # later: maybe add a prompt here to ask the user if the channel ID looks valid? and if it doesn't, it can iterate through all the different channel IDs and ask for each one? Also, add error handling if no match found + channelIDURL = matchChannelID.group(0) + # later: add a way to let the user + # later: switch to memento API for access to more archives? + waybackpy_url_object = waybackpy.Url(channelIDURL, User_Agent) + nearest_archive_url = waybackpy_url_object.near(year=2019) + # later: add a way to see if there are any archives at all, and if there are any before Plus IDs were removed? + rArchived = requests.get(nearest_archive_url) + # later: add error handling if request fails + matchGAIAID = re.search("(?:https?:\/\/plus.google.com\/)([0-9]+)", rArchived.text) + # later: add error handling if no match found + gaia_id = matchGAIAID.group(1) + from ghunt.modules import gaia + await gaia.hunt(None, gaia_id, json_file) From 735a9225a6db0ebbb61b2a2c3dce312dafeda9e1 Mon Sep 17 00:00:00 2001 From: justanotherosinter <134204602+justanotherosinter@users.noreply.github.com> Date: Wed, 24 May 2023 16:30:35 -0400 Subject: [PATCH 3/3] Rename YouTube.py to youtube.py Fix filename --- ghunt/modules/{YouTube.py => youtube.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ghunt/modules/{YouTube.py => youtube.py} (100%) diff --git a/ghunt/modules/YouTube.py b/ghunt/modules/youtube.py similarity index 100% rename from ghunt/modules/YouTube.py rename to ghunt/modules/youtube.py