RyanDFIR · RyanDFIR · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/unfurl/core.py b/unfurl/core.py
@@ -253,6 +253,27 @@ def preceding_domain_contains(self, node, label):
         labels = preceding.split('.')
         return label in labels
 
+    def find_preceding_path(self, node):
+        """Find the URL path associated with a node by traversing up to the URL
+        ancestor and looking for a url.path sibling."""
+        parent_nodes = self.get_predecessor_node(node)
+
+        if not parent_nodes:
+            return ''
+
+        for parent_node in parent_nodes:
+            if parent_node.data_type == 'url':
+                for child_node in self.get_successor_nodes(parent_node):
+                    if child_node.data_type == 'url.path':
+                        return child_node.value
+                return ''
+            else:
+                result = self.find_preceding_path(parent_node)
+                if result:
+                    return result
+
+        return ''
+
     def get_id(self):
         new_id = self.next_id
         self.next_id += 1

diff --git a/unfurl/parsers/parse_google.py b/unfurl/parsers/parse_google.py
@@ -365,6 +365,11 @@ def run(unfurl, node):
                 parent_id=node.node_id, incoming_edge_config=google_edge)
 
         elif node.key == 'q':
+            # On /url redirect pages, q is the destination URL, not a search query.
+            # Skip here and let the google site_def handle it with the correct label.
+            if unfurl.find_preceding_path(node) == '/url':
+                return
+
             unfurl.add_to_queue(
                 data_type='google.q', key=None, value=f'Search Query: {node.value}',
                 hover='Terms used in the Google search', parent_id=node.node_id, incoming_edge_config=google_edge)

diff --git a/unfurl/parsers/parse_site_defs.py b/unfurl/parsers/parse_site_defs.py
@@ -154,6 +154,12 @@ def _check_query_rule(unfurl, node, rule, site_def):
     if node.key != rule.get('key'):
         return False
 
+    # Optional path scoping: only apply this rule when the URL path matches.
+    if 'path' in rule:
+        preceding_path = unfurl.find_preceding_path(node)
+        if preceding_path != rule['path']:
+            return False
+
     apply = rule['apply']
 
     # hover_only: just set hover text on the existing node, don't create a child

diff --git a/unfurl/parsers/site_defs/README.md b/unfurl/parsers/site_defs/README.md
@@ -204,6 +204,22 @@ use `hover_only`:
         Content type/rendering flags.
 ```
 
+### Scoping by path
+
+A query parameter can mean different things depending on the URL path. Use `path` to
+restrict a rule to a specific path:
+
+```yaml
+  # q means "redirect target" on /url, but "search query" on /search
+  - key: q
+    path: /url
+    apply:
+      hover_only: true
+      hover: The redirect target URL.
+```
+
+Without `path`, the rule fires on every URL for the domain that has the matching key.
+
 ### Parsing values as URLs
 
 Set `data_type: url` to have unfurl parse the parameter value as a full URL:
@@ -301,4 +317,5 @@ class TestExample(unittest.TestCase):
 See the existing definitions in this directory for complete examples:
 - `github.yaml` - Path rules, query rules, fragment rules, exclude_sibling
 - `facebook.yaml` - Complex path rules, wildcard excludes, hover_only query rules
+- `google.yaml` - Path-scoped query rules, hover_only for context-dependent parameters
 - `instagram.yaml` - Multiple URL formats for the same content type
diff --git a/unfurl/parsers/site_defs/google.yaml b/unfurl/parsers/site_defs/google.yaml
@@ -0,0 +1,157 @@
+# Google URL parser definition (non-search endpoints)
+# Search-specific parameters (ei, ved, gs_l, aqs, etc.) are handled
+# by parse_google.py. This file covers other Google URL types.
+# Parsed by parse_site_defs.py
+
+name: Google
+domains:
+  - google.com
+edge:
+  color: "#4285F4"
+  title: Google
+  label: "G"
+
+# --- /url redirect pages ---
+# google.com/url?q=DESTINATION&sa=D&ust=TIMESTAMP&usg=SIGNATURE
+query_rules:
+
+  - key: q
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        The <b>redirect target URL</b> in a Google redirect link (/url).
+        Google wraps outbound links through this redirect for
+        click tracking and malware warnings. This is <em>not</em> a
+        search query — it is the destination the user was sent to.
+
+  - key: url
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        The <b>redirect target URL</b> in a Google redirect link (/url).
+        Alternate parameter name for the redirect target
+        (used in some older Google redirect formats).
+
+  - key: sa
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Google redirect action type. Common values:
+        <b>t</b> = standard redirect (from search results),
+        <b>D</b> = redirect from a Google product (Docs, Hangouts, etc.),
+        <b>U</b> = redirect from a Google Cache page.
+
+  - key: usg
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Google URL signature. A hash used to verify the redirect
+        link was generated by Google and has not been tampered with.
+
+  - key: ust
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Google redirect timestamp (microseconds since Unix epoch).
+        Believed to indicate when the redirect link was generated
+        or when the referring page was loaded.
+
+  - key: ved
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Google tracking parameter encoding the link's position and type
+        on the referring page. Contains a protobuf-encoded structure.
+
+  - key: rct
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Redirect confirmation type. Typically <b>j</b> (JavaScript redirect).
+
+  - key: esrc
+    path: /url
+    apply:
+      hover_only: true
+      hover: >-
+        Source of the redirect. Typically <b>s</b> (search).
+
+  # --- /imgres (image result pages) ---
+
+  - key: imgurl
+    apply:
+      label: "Image URL: {value}"
+      data_type: url
+      hover: >-
+        The direct URL of the full-size image shown in Google Image results.
+
+  - key: imgrefurl
+    apply:
+      label: "Image Source Page: {value}"
+      data_type: url
+      hover: >-
+        The URL of the web page that contains/hosts the image.
+
+  - key: h
+    path: /imgres
+    apply:
+      label: "Image Height: {value}px"
+      data_type: descriptor
+      hover: >-
+        The height of the original image in pixels.
+
+  - key: w
+    path: /imgres
+    apply:
+      label: "Image Width: {value}px"
+      data_type: descriptor
+      hover: >-
+        The width of the original image in pixels.
+
+  - key: tbnid
+    path: /imgres
+    apply:
+      hover_only: true
+      hover: >-
+        Google Image thumbnail ID. A unique identifier for this image
+        in Google's index. Can be used to correlate the same image
+        across different search sessions.
+
+  - key: docid
+    path: /imgres
+    apply:
+      hover_only: true
+      hover: >-
+        Google Image document ID. Identifies the web page hosting the
+        image in Google's index.
+
+  - key: tbnh
+    path: /imgres
+    apply:
+      label: "Thumbnail Height: {value}px"
+      data_type: descriptor
+      hover: >-
+        The height of the thumbnail image displayed in search results.
+
+  - key: tbnw
+    path: /imgres
+    apply:
+      label: "Thumbnail Width: {value}px"
+      data_type: descriptor
+      hover: >-
+        The width of the thumbnail image displayed in search results.
+
+  - key: iact
+    path: /imgres
+    apply:
+      hover_only: true
+      hover: >-
+        Interaction action type — how the user reached this image result.
+        Common values: <b>rc</b> = right-click, <b>hc</b> = hover-click.
diff --git a/unfurl/tests/unit/test_google.py b/unfurl/tests/unit/test_google.py
@@ -1,4 +1,5 @@
 from unfurl.core import Unfurl
+from urllib.parse import urlparse
 import unittest
 
 
@@ -96,5 +97,48 @@ def test_google_search_with_aqs(self):
         self.assertEqual(len(test.edges), 0)
 
 
+    def test_google_url_redirect(self):
+        """Test that google.com/url redirects are parsed correctly.
+
+        The q parameter should NOT be labeled as a search query; it is
+        a redirect target URL. The hover text should explain this.
+        """
+
+        test = Unfurl()
+        test.remote_lookups = False
+        test.add_to_queue(
+            data_type='url', key=None,
+            value='https://www.google.com/url?q=https://example.org/landing'
+                  '&sa=D&ust=1546552999624000&usg=AFQjCNGESR0jI6krt8QOg3NlJ0GS60RxJg')
+        test.parse_queue()
+
+        # confirm q is NOT labeled as "Search Query"
+        google_q_nodes = [n for n in test.nodes.values() if n.data_type == 'google.q']
+        self.assertEqual(0, len(google_q_nodes))
+
+        # confirm q has the redirect hover text
+        q_node = next(n for n in test.nodes.values()
+                      if n.data_type == 'url.query.pair' and n.key == 'q')
+        self.assertIn('redirect target', q_node.hover.lower())
+
+        # confirm the destination URL is parsed
+        dest_urls = [
+            n for n in test.nodes.values()
+            if n.data_type == 'url'
+            and urlparse(str(n.value)).hostname == 'example.org'
+        ]
+        self.assertGreaterEqual(len(dest_urls), 1)
+
+        # confirm sa has hover text
+        sa_node = next(n for n in test.nodes.values()
+                       if n.data_type == 'url.query.pair' and n.key == 'sa')
+        self.assertIn('action type', sa_node.hover.lower())
+
+        # confirm usg has hover text
+        usg_node = next(n for n in test.nodes.values()
+                        if n.data_type == 'url.query.pair' and n.key == 'usg')
+        self.assertIn('signature', usg_node.hover.lower())
+
+
 if __name__ == '__main__':
     unittest.main()