mxrch · soxoj · May 17, 2026 · May 18, 2026
diff --git a/gitfive/lib/commits.py b/gitfive/lib/commits.py
@@ -1,3 +1,5 @@
+import json
+
 import trio
 from bs4 import BeautifulSoup
 from alive_progress import alive_bar
@@ -8,11 +10,54 @@
 from gitfive.lib.instruments import TrioAliveProgress
 
 
+def _extract_payload(raw_body: str):
+    """
+    The new GitHub commits page renders an embedded JSON payload inside a
+    `<script type="application/json">` tag. The old `<li class="js-commits-list-item">`
+    DOM has been removed. The payload contains a `commitGroups` list whose entries
+    each hold a `commits` list with `oid` / `authors` / `bodyMessageHtml` fields —
+    everything Metamon needs to map fake commit hashes back to the recognised
+    GitHub user (the `authors[1]` entry, if any).
+    """
+    body = BeautifulSoup(raw_body, 'html.parser')
+    for s in body.find_all('script', {'type': 'application/json'}):
+        text = s.string or ''
+        if 'commitGroups' in text:
+            try:
+                data = json.loads(text)
+            except json.JSONDecodeError:
+                continue
+            payload = data.get('payload')
+            if isinstance(payload, dict) and 'commitGroups' in payload:
+                return payload
+    return None
+
+
+def _iter_commits(payload):
+    for group in payload.get('commitGroups', []) or []:
+        for commit in group.get('commits', []) or []:
+            yield commit
+
+
+def _target_author(commit):
+    """
+    Each Metamon commit has two `authors` entries: the local committer
+    (`gitfive_hunter`, login=None) and the impersonated co-author. We want the
+    one that GitHub successfully linked to a real account — i.e. has a non-null
+    `login`. Returns None when nothing was matched (the email is unknown to GH).
+    """
+    for author in commit.get('authors', []) or []:
+        login = author.get('login')
+        if login and login != 'gitfive_hunter':
+            return author
+    return None
+
+
 async def fetch_avatar(runner: GitfiveRunner, email: str, avatar_link: str, username: str,
                         out: Dict[str, str|bool], check_only: bool):
     async with runner.limiters["commits_fetch_avatar"]:
         is_target = (username.lower() == runner.target.username.lower())
-        if check_only:            
+        if check_only:
             if is_target:
                 runner.rc.print(f"[+] [Target's email] 🐱 {email} -> @{username}", style="cyan")
 
@@ -49,42 +94,53 @@ async def fetch_commits(runner: GitfiveRunner, repo_name: str, emails_index: Dic
 
         if req.status_code == 429:
             exit(f'Rate-limit detected, please adjust the CapacityLimiter.\nCurrent CapacityLimiter : {runner.limiters["commits_scrape"]}')
-        body = BeautifulSoup(req.text, 'html.parser')
 
-        commits = body.find_all("li", {"class": "js-commits-list-item"})
-
+        payload = _extract_payload(req.text)
+        if payload is None:
+            return
+
         async with trio.open_nursery() as nursery:
-            for commit in commits:
-                hexsha = commit.find("a", {"class": "js-navigation-open"}).attrs["href"].split("/")[-1]
-                avatar = commit.find("img", {"class": "avatar-user"})
-                if not avatar:
+            for commit in _iter_commits(payload):
+                hexsha = commit.get('oid')
+                if not hexsha or hexsha not in emails_index:
+                    continue
+                target = _target_author(commit)
+                if target is None:
                     continue
 
                 email = emails_index[hexsha]
-                avatar_link = avatar.get("src")
-                username = avatar.get("alt")[1:] # We remove the "@" at the beginning
-                
+                avatar_link = target.get('avatarUrl')
+                username = target.get('login')
+
                 nursery.start_soon(fetch_avatar, runner, email, avatar_link, username, out, check_only)
 
 async def scrape(runner: GitfiveRunner, repo_name: str, emails_index: Dict[str, str], check_only=False):
     out = {}
-    total = 0
-    last_hash_trigger = f"/{runner.creds.username}/{repo_name}/tree/"
-    last_hash = ""
 
-    req = await runner.as_client.get(f"https://github.com/{runner.creds.username}/{repo_name}")
-    body = BeautifulSoup(req.text, 'html.parser')
+    req = await runner.as_client.get(f"https://github.com/{runner.creds.username}/{repo_name}/commits/mirage")
+    if req.status_code != 200:
+        exit(f"Couldn't fetch the commits page (HTTP {req.status_code}).")
 
-    if is_repo_empty(body):
-        exit("Empty repository.")
+    payload = _extract_payload(req.text)
+    if payload is None:
+        body = BeautifulSoup(req.text, 'html.parser')
+        if is_repo_empty(body):
+            exit("Empty repository.")
+        exit("Couldn't parse the commits page payload.")
 
-    if last_hash_trigger in req.text:
-        _, total = await get_commits_count(runner, raw_body=req.text)
-        last_hash = [x for x in body.select('a') if x.text.lower() == "permalink"][0].attrs['href'].split('/')[-1]
-    else:
+    last_hash = (payload.get('currentCommit') or {}).get('oid') \
+        or (payload.get('refInfo') or {}).get('currentOid')
+    if not last_hash:
         exit("Couldn't fetch the last hash.")
 
-    to_request = [0]+list(range(-1, total-1, 35))[1:]
+    _, total = await get_commits_count(runner, raw_body=req.text)
+    if not total:
+        # Fall back to counting whatever the payload already gave us.
+        total = sum(len(g.get('commits', []) or []) for g in payload.get('commitGroups', []) or [])
+    if not total:
+        return out
+
+    to_request = [0] + list(range(-1, total-1, 35))[1:]
 
     with alive_bar(total, receipt=False, enrich_print=False, title="Fetching committers...") as bar:
         instrument = TrioAliveProgress(fetch_commits, 35, bar)
@@ -97,4 +153,4 @@ async def scrape(runner: GitfiveRunner, repo_name: str, emails_index: Dict[str,
 
         trio.lowlevel.remove_instrument(instrument)
 
-    return out
+    return out
diff --git a/gitfive/lib/domain_finder.py b/gitfive/lib/domain_finder.py
@@ -24,11 +24,20 @@ def guess_custom_domain(runner: GitfiveRunner):
     except Exception: # https://github.com/mxrch/GitFive/issues/15
         runner.rc.print("[!] Google Search failed, are you using a VPN/Proxy ?", style="italic")
 
-    # Hunter.io
-    req = httpx.get(f"https://hunter.io/v2/domains-suggestion?query={company}")
-    data = json.loads(req.text)
-    if results := data.get("data", [{}]):
-        hunter = results[0].get("domain")
+    # Hunter.io — the public hunter.io/v2 endpoint now 303-redirects to
+    # api.hunter.io and requires an API key (returns 401). Treat any non-200
+    # or non-JSON response as "no result" instead of crashing the whole run.
+    try:
+        req = httpx.get(
+            f"https://hunter.io/v2/domains-suggestion?query={company}",
+            follow_redirects=True,
+        )
+        if req.status_code == 200:
+            data = req.json()
+            if results := data.get("data", [{}]):
+                hunter = results[0].get("domain")
+    except (httpx.HTTPError, json.JSONDecodeError, ValueError):
+        runner.rc.print("[!] Hunter.io lookup failed.", style="italic")
 
     if hunter and (not google or hunter in google):
         runner.rc.print(f'🔍 [Hunter.io] Found possible domain "{hunter}" for company "{company}"', style="light_green")

diff --git a/gitfive/lib/repos.py b/gitfive/lib/repos.py
@@ -48,11 +48,41 @@ async def fetch_repos_page(runner: GitfiveRunner, page: int, repos: List[Dict[st
             repos.append(details)
 
 
+def _extract_repo_count(body: BeautifulSoup) -> int:
+    """
+    Extract the repository count from the navigation tab on a user's profile.
+    GitHub serves two different shells: the legacy anonymous one uses
+    `<span class="Counter" title="N">`, the newer Primer-React one (shown to
+    authenticated viewers) uses `<span data-component="counter">` with the
+    number in nested text.
+    """
+    tab = body.find("a", {"data-tab-item": "repositories"})
+    if tab is None:
+        return 0
+
+    legacy = tab.find("span", {"class": "Counter"})
+    if legacy is not None:
+        raw = legacy.attrs.get("title") or legacy.get_text()
+    else:
+        modern = tab.find("span", {"data-component": "counter"})
+        if modern is None:
+            return 0
+        raw = modern.get_text()
+
+    digits = ''.join(ch for ch in raw if ch.isdigit())
+    return int(digits) if digits else 0
+
+
 async def get_list(runner: GitfiveRunner):
     req = await runner.as_client.get(f"https://github.com/{runner.target.username}?tab=repositories")
 
     body = BeautifulSoup(req.text, 'html.parser')
-    total_repos = int(body.find("a", {"data-tab-item": "repositories"}).find("span", {"class": "Counter"}).attrs["title"])
+    total_repos = _extract_repo_count(body)
+
+    if not total_repos:
+        runner.target.repos = []
+        runner.target.languages_stats = {}
+        return
 
     to_request = range(1, ceil(total_repos/30)+1)
 

diff --git a/gitfive/lib/utils.py b/gitfive/lib/utils.py
@@ -110,13 +110,17 @@ async def get_commits_count(runner: GitfiveRunner, repo_url: str="", raw_body: s
         raw_body = req.text
     body = BeautifulSoup(raw_body, 'html.parser')
     # Slightly modified this line to find the correct <span> containing the commit count
-    commits_icon_el = body.find("a", {"href": re.compile(r'.*/commits/mirage$')})
+    commits_icon_el = body.find("a", {"href": re.compile(r'.*/commits/mirage/?$')})
     if not commits_icon_el:
         return False, 0
     nb_commits_el = commits_icon_el.findNext("span")
     if not nb_commits_el:
         return False, 0
-    nb_commits_str = nb_commits_el.text.split()[0].replace(",", "")
+    nb_commits_text = nb_commits_el.text.strip() or commits_icon_el.get_text(strip=True)
+    parts = nb_commits_text.split()
+    if not parts:
+        return False, 0
+    nb_commits_str = parts[0].replace(",", "")
     if nb_commits_str == "∞":
         return True, 50000 # Temporary limit, because GitHub hasn't liked my 70k commits
     nb_commits = int(nb_commits_str)