From 71bed084c824c0909614cdc9b00c9056611011fa Mon Sep 17 00:00:00 2001
From: Eclair Ji Li <eclairjili@ajisin.local>
Date: Fri, 29 May 2026 17:20:23 +0800
Subject: [PATCH 1/2] Add SG grocery product-search domain skills (Shopee,
 Lazada, FairPrice)

Field-tested playbooks for searching and extracting frozen ramen/udon SKUs
from the three Singapore grocery sites, captured during a competitor survey:

- shopee/product-search.md: login wall, blocked search API, lazy-rendered
  grid, innerText price-line parser, IPC/timeout recovery ladder
- lazada/product-search.md: price-anchored DOM walk, large-js()-returns-None
  payload cap, PDP spec-grid parser, card-price != RSP, RedMart = Lazada
- fairprice/product-search.md: clean /product/ anchors, richest PDP
  (origin + storage fields), fuzzy-search filtering

Also adds reusable Lazada/Shopee list + PDP extractors to agent_helpers.py.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 agent-workspace/agent_helpers.py              | 141 ++++++++++++-
 .../domain-skills/fairprice/product-search.md | 151 +++++++++++++
 .../domain-skills/lazada/product-search.md    | 199 ++++++++++++++++++
 .../domain-skills/shopee/product-search.md    | 176 ++++++++++++++++
 4 files changed, 664 insertions(+), 3 deletions(-)
 create mode 100644 agent-workspace/domain-skills/fairprice/product-search.md
 create mode 100644 agent-workspace/domain-skills/lazada/product-search.md
 create mode 100644 agent-workspace/domain-skills/shopee/product-search.md

diff --git a/agent-workspace/agent_helpers.py b/agent-workspace/agent_helpers.py
index 2d493c17..414e4a04 100644
--- a/agent-workspace/agent_helpers.py
+++ b/agent-workspace/agent_helpers.py
@@ -1,7 +1,142 @@
 """Agent-editable browser helpers.
 
-Add task-specific browser primitives here. Core helpers from browser_harness.helpers
-load this file when BH_AGENT_WORKSPACE points at this directory, or when this
-repo's default agent-workspace exists.
+Task-specific extractors for the frozen ramen/udon competitor survey.
+Keeping JS here as plain strings avoids shell/heredoc escaping problems.
 """
+import json
 
+
+def _eval(expr):
+    from browser_harness.helpers import js
+    return js(expr)
+
+
+def laz_list(limit=60):
+    """Lazada: price-anchored card extraction. Walk up from each price node
+    to a card container, grab its title. Small payload (filtered in JS)."""
+    expr = r"""
+    (function(){
+      var out=[]; var seen={};
+      var all=document.querySelectorAll('*');
+      for (var i=0;i<all.length;i++){
+        var el=all[i];
+        if (el.children.length) continue;            // leaf nodes only
+        var t=(el.innerText||'').trim();
+        if (!/^\$[\d,]+\.\d{2}$/.test(t)) continue;   // a price leaf
+        // climb to a card that also contains a product title link
+        var card=el; var title='';
+        for (var k=0;k<8 && card;k++){
+          card=card.parentElement;
+          if (!card) break;
+          var a=card.querySelector('a[title], a[href*=".html"]');
+          if (a){
+            title=(a.getAttribute('title')||a.innerText||'').replace(/\s+/g,' ').trim();
+            if (title.length>6) break;
+          }
+        }
+        if (title.length<6) continue;
+        var key=title.slice(0,60);
+        if (seen[key]) continue; seen[key]=1;
+        var link='';
+        var la=card.querySelector('a[href*=".html"]');
+        if (la) link=la.href.split('?')[0];
+        out.push({name:title.slice(0,130), price:t, href:link});
+        if (out.length>=LIMIT) break;
+      }
+      return JSON.stringify(out);
+    })()
+    """.replace("LIMIT", str(limit))
+    r = _eval(expr)
+    return json.loads(r) if r else []
+
+
+def shopee_list():
+    """Return list of {name, price, href} for a Shopee search page."""
+    expr = r"""
+    (function(){
+      var out=[]; var seen={};
+      var items=document.querySelectorAll('li.shopee-search-item-result__item, .shopee-search-item-result__item');
+      if(!items.length){
+        var anchors=document.querySelectorAll('a');
+        for (var i=0;i<anchors.length;i++){
+          var a=anchors[i]; var h=a.href||'';
+          if (!/i\.\d+\.\d+/.test(h)) continue;
+          var key=h.split('?')[0];
+          if (seen[key]) continue; seen[key]=1;
+          var txt=(a.innerText||'').replace(/\s+/g,' ').trim();
+          if (txt.length>4) out.push({name:txt.slice(0,140), price:'', href:key});
+        }
+        return JSON.stringify(out);
+      }
+      for (var i=0;i<items.length;i++){
+        var el=items[i];
+        var a=el.querySelector('a');
+        var h=a?a.href:'';
+        var key=h?h.split('?')[0]:'';
+        var txt=(el.innerText||'').replace(/\s+/g,' ').trim();
+        var m=(el.innerText||'').match(/\$[\d,]+(\.\d+)?/g);
+        out.push({name:txt.slice(0,140), price:m?m.join(' / '):'', href:key});
+      }
+      return JSON.stringify(out);
+    })()
+    """
+    r = _eval(expr)
+    return json.loads(r) if r else []
+
+
+def page_text():
+    return _eval("document.body.innerText") or ""
+
+
+def laz_detail():
+    """Parse a Lazada PDP into a dict of useful fields from page innerText."""
+    txt = page_text()
+    lines = [l.strip() for l in txt.split("\n")]
+    d = {"title": "", "brand": "", "pack_size": "", "origin": "",
+         "ptype": "", "price": "", "soldby": "", "breadcrumb": ""}
+    import re
+    # breadcrumb line contains tab-joined path ending in the title
+    for l in lines:
+        if "Groceries" in l and "\t" not in l and len(l) > 20:
+            pass
+    # title: the heading repeats right after breadcrumb; take the longest
+    # line that looks like a product title
+    def nextval(i):
+        for j in range(i + 1, min(i + 5, len(lines))):
+            if lines[j].strip():
+                return lines[j].strip()
+        return ""
+    for i, l in enumerate(lines):
+        if l.startswith("Brand:"):
+            d["brand"] = l.replace("Brand:", "").split("More ")[0].strip()
+        if l == "Pack Size":
+            d["pack_size"] = nextval(i)
+        if l == "Net Weight" and not d["pack_size"]:
+            d["pack_size"] = nextval(i)
+        if l == "Place of Origin":
+            d["origin"] = nextval(i)
+        if l == "Product Type":
+            d["ptype"] = nextval(i)
+        if l.startswith("Sold by"):
+            d["soldby"] = l.replace("Sold by", "").strip()
+    # price: the $x.xx just before the first "Add to cart" (skip $60 banner)
+    cut = txt.find("Add to cart")
+    seg = txt[:cut] if cut > 0 else txt
+    prices = re.findall(r"\$[\d,]+\.\d{2}", seg)
+    prices = [p for p in prices if p != "$60.00"]
+    if prices:
+        d["price"] = prices[-1]
+    elif re.search(r"\$[\d,]+\.\d{2}", txt):
+        d["price"] = re.search(r"\$[\d,]+\.\d{2}", txt).group(0)
+    # title: last segment of the breadcrumb line (tab-joined)
+    for l in lines:
+        if "\t" in l and ("Groceries" in l or "Frozen" in l):
+            d["title"] = l.split("\t")[-1].strip()
+            d["breadcrumb"] = " > ".join(l.split("\t"))
+            break
+    if not d["title"]:
+        cands = [l for l in lines[:30] if len(l) > 15 and any(c.isalpha() for c in l)
+                 and "FEEDBACK" not in l and "delivery" not in l.lower()]
+        if cands:
+            d["title"] = max(cands, key=len)
+    return d
diff --git a/agent-workspace/domain-skills/fairprice/product-search.md b/agent-workspace/domain-skills/fairprice/product-search.md
new file mode 100644
index 00000000..0271bd5e
--- /dev/null
+++ b/agent-workspace/domain-skills/fairprice/product-search.md
@@ -0,0 +1,151 @@
+# FairPrice (fairprice.com.sg) — Product Search & Data Extraction
+
+Field-tested against www.fairprice.com.sg on 2026-05-29 via a focused recon pass
+(a few frozen ramen/udon searches + one PDP) during a competitor price survey. Coverage of
+edge cases is lighter than the Shopee/Lazada skills, but the core search → extract → PDP
+flow below is confirmed working. FairPrice SG is the **easiest of the three SG grocery
+sites**: no login wall, no bot/traffic wall, no age popup, and a light page (~50 anchors,
+~1–2k chars of text).
+
+## Navigation
+
+### Search URL (no login needed)
+```python
+goto_url("https://www.fairprice.com.sg/search?query=frozen%20ramen")  # spaces = %20 (or +)
+wait(6)   # light page; results render in ~5-6s. wait_for_load() also tends to work here.
+```
+- Title becomes `Results For <query> | FairPrice`; body shows `Results for "<query>"`.
+- No delivery address/postal code is required to see prices (the "Enter your address" /
+  "Fees may apply" prompts do **not** block results).
+- **Search is fuzzy** — a "frozen udon" query returned a shrimp-fritter ("Bakwan Udang").
+  Always filter results by name relevance.
+
+### Product detail URL
+Pattern: `/product/<slug>-<numericCode>`, e.g.
+`/product/shimadaya-frozen-shoyu-ramen-noodles-with-soup-stock-frozen-477g-90016285`
+(some house-brand items are slug-only, e.g. `/product/chicken-collagen-ramen---frozen`).
+```python
+goto_url("https://www.fairprice.com.sg/product/shimadaya-frozen-shoyu-ramen-noodles-with-soup-stock-frozen-477g-90016285")
+wait(5)
+```
+
+## Search results extraction
+
+Unlike Lazada, FairPrice result cards use **clean `/product/` anchors**, so anchor on those
+and climb to the card that contains the price. Each card's innerText is conveniently rich:
+`$<price> <name> <packSize> By <date> Add to cart` (or `... Out of stock`).
+
+```python
+import json
+expr = r"""
+(function(){
+  var out=[], seen={};
+  var as=document.querySelectorAll('a[href*="/product/"]');
+  for(var i=0;i<as.length;i++){
+    var a=as[i], h=a.getAttribute('href')||'';
+    if(seen[h]) continue; seen[h]=1;
+    var card=a;
+    for(var k=0;k<6 && card;k++){ card=card.parentElement;
+      if(card && /\$[\d,]+\.\d{2}/.test(card.innerText||'')) break; }
+    var t=card?(card.innerText||'').replace(/\s+/g,' ').trim():'';
+    var p=(t.match(/\$[\d,]+\.\d{2}/g)||[]);
+    out.push({href:h, price:p[0]||'', card:t.slice(0,110)});
+    if(out.length>=40) break;
+  }
+  return JSON.stringify(out);
+})()
+"""
+items = json.loads(js(expr) or "[]")
+```
+
+Field notes:
+- **`price`**: take the **first** `$x.xx` in the card text — that's the selling price.
+  FairPrice has no "$60 free-delivery" banner (the Lazada trap), so first-match is safe.
+- **Card text begins with the price**, then the name. To get a clean name, strip the leading
+  price token: `name = re.sub(r'^\$[\d,]+\.\d{2}\s*', '', card)` then cut at ` By ` /
+  ` Add to cart` / ` Out of stock`.
+- **Pack size is inline** in the card (e.g. `477 G`, `150 G`, `2 x 150 G`) — already captured
+  in the card text, no PDP needed for size.
+- Result sets are **small and low-noise** (8 for "frozen ramen", 4 for "frozen udon") — far
+  cleaner than Shopee. No pagination/"load more" surfaced for these queries; treat the grid
+  as a single set.
+
+## Product detail page (PDP) — richest of the three sites
+
+PDP innerText carries labelled fields, each **value on the line after its label** (except
+`Brand:` which is inline):
+
+```
+Home  Frozen  Frozen Food  Ready Meals          <- breadcrumb
+$8.80                                            <- price (first $ on page)
+Shimadaya Frozen Shoyu Ramen Noodles with Soup Stock   <- name
+477 G                                            <- pack size
+Brand:Shimadaya
+Sold by:
+Kirei Japanese Food Supply                       <- marketplace seller
+KEY INFORMATION
+<description: flavour, portions, etc.>
+COUNTRY/PLACE OF ORIGIN
+Japan
+STORAGE INFORMATION
+Keep frozen at or below -15 degree C ...         <- confirms frozen vs chilled
+```
+
+Parser:
+```python
+import re
+txt = js("document.body.innerText") or ""
+lines=[l.strip() for l in txt.split("\n")]
+def after(label):
+    for i,l in enumerate(lines):
+        if l==label:
+            for j in range(i+1, min(i+4,len(lines))):
+                if lines[j].strip(): return lines[j].strip()
+    return ""
+d={}
+m=re.search(r"\$[\d,]+\.\d{2}", txt); d["price"]=m.group(0) if m else ""
+for l in lines:
+    if l.startswith("Brand:"): d["brand"]=l.split("Brand:")[1].strip()
+d["origin"]   = after("COUNTRY/PLACE OF ORIGIN")
+d["storage"]  = after("STORAGE INFORMATION")          # tells frozen vs chilled
+d["desc"]     = after("KEY INFORMATION")
+d["soldby"]   = after("Sold by:")
+# name/packsize: the two non-empty lines right after the price line
+```
+
+Fields you can reliably pull: **price, name, pack size, Brand, Sold-by seller,
+COUNTRY/PLACE OF ORIGIN, STORAGE INFORMATION (frozen/chilled), KEY INFORMATION
+(description/flavour/portions)**. This is the only one of the three sites that exposes a
+storage line — useful to confirm a product is genuinely frozen.
+
+## Coverage note
+
+FairPrice carries SKUs **not** on Lazada (e.g. "Shimadaya Reito Tenobe Masari Frozen Udon",
+"Kirei Premium Shoyu Tonkotsu Ramen") and overlaps on others (Little Totler, Shimadaya
+Shoyu, Kirei, Daisho). Use it as a **complementary source** alongside Lazada/RedMart —
+don't assume Lazada coverage is a superset.
+
+## Tab hygiene
+
+Open once, reuse with `goto_url`, close when done — don't `new_tab` per search.
+```python
+tid = new_tab("https://www.fairprice.com.sg/search?query=frozen%20ramen"); wait(6)
+# reuse with goto_url for each subsequent query / PDP ...
+close_tab(tid)
+# end-of-task sweep:
+for t in list_tabs(include_chrome=False):
+    if "fairprice.com.sg" in (t.get("url") or ""): close_tab(t["targetId"])
+```
+
+## Gotchas (field-tested)
+
+- **No login / bot / age wall** — easiest of the three SG sites; anonymous search works.
+- **Light, fast page** — `wait(5-6)` is plenty; no full-page-screenshot or scroll-hang issues like Shopee.
+- **`/product/` anchors are clean** — anchor on them directly (no price-walk hack needed, unlike Lazada).
+- **Card text starts with the price** — strip the leading `$x.xx` to get the name; pack size is inline in the card.
+- **First `$` on a PDP IS the price** — no delivery-banner trap (unlike Lazada's `$60.00`).
+- **PDP labels → value on the next line**; `Brand:` is inline. `STORAGE INFORMATION` confirms frozen vs chilled.
+- **Fuzzy search** — irrelevant items can appear (e.g. "Udang" matched "udon"); filter by name.
+- **Marketplace sellers** ("Sold by: ...") — many Japanese frozen items are 3rd-party marketplace listings, not FairPrice house stock.
+- **Unique catalogue** — has SKUs absent from Lazada; treat as complementary, not redundant.
+- **Reuse one tab, close when done.**
diff --git a/agent-workspace/domain-skills/lazada/product-search.md b/agent-workspace/domain-skills/lazada/product-search.md
new file mode 100644
index 00000000..ed5dc24a
--- /dev/null
+++ b/agent-workspace/domain-skills/lazada/product-search.md
@@ -0,0 +1,199 @@
+# Lazada (lazada.sg) & RedMart — Product Search & Data Extraction
+
+Field-tested against www.lazada.sg on 2026-05-29 (frozen ramen/udon competitor survey).
+Unlike Shopee, Lazada SG search works **without login** and is far more reliable. The two
+things that wasted calls were (1) large `js()` payloads silently returning `None`, and
+(2) product links not following one clean pattern — both solved below.
+
+RedMart lives inside Lazada: `redmart.lazada.sg` redirects to `www.lazada.sg`, RedMart
+items appear in normal Lazada search (with a RedMart badge/tab), and clicking one lands on
+a `www.lazada.sg/products/...html` PDP with RedMart delivery UI. Treat RedMart as Lazada.
+
+## Navigation
+
+### Search URL (primary entry point — no login needed)
+```python
+goto_url("https://www.lazada.sg/catalog/?q=frozen+ramen")  # spaces = +
+wait(4)
+```
+- `/catalog/?q=X` frequently 302s to `/tag/X/?q=X&catalog_redirect_tag=true`. That's normal —
+  results still load. Confirm with `page_info()["url"]`.
+- Product detail URL pattern: `https://www.lazada.sg/products/pdp-i<itemId>.html`
+  (sometimes `...-i<itemId>-s<skuId>.html`).
+
+### Load the whole result grid before extracting
+Lazada lazy-loads cards on scroll. `window.scrollTo` works here (unlike Shopee), so step
+through a few offsets:
+```python
+for y in (400, 1200, 2200, 3200):
+    js("window.scrollTo(0, %d)" % y); wait(0.6)
+```
+
+## Two traps that cost real time
+
+### Trap 1 — large `js()` results come back as `None` (IPC size cap)
+`js("JSON.stringify(Array.from(document.querySelectorAll('a')).map(...))")` over **all**
+anchors (1000+ on a Lazada page) returns `None`/empty: the response exceeds the IPC
+`recv(1<<16)` framing and is dropped. **Always filter and cap inside the JS** so the
+returned payload is small (≤ ~60 short items). Small `js()` reads (a `.length`, one field)
+work fine; it's only the big serialisations that fail.
+
+### Trap 2 — product links have no single clean selector
+On search/tag pages, `/products/` anchors are mostly the 2 sponsored ads; the real result
+cards use varying markup, so href-filtering misses them. **Anchor on the price instead.**
+Find leaf nodes whose text is exactly a price, climb to the enclosing card, read its title
+link. This reliably yielded ~39 products per page:
+
+```python
+import json
+expr = r"""
+(function(){
+  var out=[], seen={};
+  var all=document.querySelectorAll('*');
+  for (var i=0;i<all.length;i++){
+    var el=all[i];
+    if (el.children.length) continue;             // leaf nodes only
+    var t=(el.innerText||'').trim();
+    if (!/^\$[\d,]+\.\d{2}$/.test(t)) continue;    // a price leaf
+    var card=el, title='', link='';
+    for (var k=0;k<8 && card;k++){
+      card=card.parentElement; if(!card) break;
+      var a=card.querySelector('a[title], a[href*=".html"]');
+      if (a){ title=(a.getAttribute('title')||a.innerText||'').replace(/\s+/g,' ').trim();
+              if (title.length>6) break; }
+    }
+    if (title.length<6) continue;
+    var key=title.slice(0,60); if(seen[key]) continue; seen[key]=1;
+    var la=card.querySelector('a[href*=".html"]'); if(la) link=la.href.split('?')[0];
+    out.push({name:title.slice(0,130), price:t, href:link});
+    if (out.length>=60) break;                     // cap payload — see Trap 1
+  }
+  return JSON.stringify(out);
+})()
+"""
+items = json.loads(js(expr) or "[]")
+```
+
+Note the JS escapes (`\$`, `\d`, `\s`) survive a `cat <<'PY'` heredoc because the delimiter
+is quoted. If you instead keep the extractor in `agent-workspace/agent_helpers.py`, you
+avoid all heredoc-escaping risk entirely (recommended for repeated runs).
+
+## Single-invocation rule (avoids stale-tab `None`)
+
+Each `browser-harness <<PY` call re-attaches to a tab; a *separate* call can attach to a
+different/stale session and return `None` or junk. **Navigate + wait + scroll + extract in
+ONE invocation.** If a read looks wrong, `ensure_real_tab()` then re-do the nav+extract in
+a single call. Confirm you're live with `js("document.querySelectorAll('a').length")`
+(~1000 on a loaded results page; ~8 means not rendered yet).
+
+## Product detail page (PDP) extraction
+
+Grocery/RedMart PDPs expose a clean spec grid in `document.body.innerText`. Scroll deep
+(~3000px) first or the grid won't be rendered:
+
+```python
+goto_url("https://www.lazada.sg/products/pdp-i303976586.html"); wait(4)
+for y in (600,1400,2200,3000): js("window.scrollTo(0,%d)"%y); wait(0.4)
+txt = js("document.body.innerText") or ""
+```
+
+The text layout (note the **blank line between label and value**):
+```
+Groceries	Frozen	Convenience Foods	Ready-to-Eat Meals	<Title>     <- tab-joined breadcrumb
+<Title>
+Brand:<Brand>More Frozen from <Brand>
+Pack Size
+
+477 g
+Place of Origin
+
+Japan
+Product Type
+
+Frozen
+$8.80                <- real price (the $60.00 above is the "free delivery" banner)
+Add to cart
+...
+Sold by <seller>
+```
+
+Parser:
+```python
+import re
+lines=[l.strip() for l in txt.split("\n")]
+def nextval(i):                       # value sits 1-4 lines below its label (blanks between)
+    for j in range(i+1, min(i+5,len(lines))):
+        if lines[j].strip(): return lines[j].strip()
+    return ""
+d={"title":"","brand":"","pack_size":"","origin":"","ptype":"","price":"","soldby":""}
+for i,l in enumerate(lines):
+    if l.startswith("Brand:"):       d["brand"]=l.replace("Brand:","").split("More ")[0].strip()
+    if l=="Pack Size":               d["pack_size"]=nextval(i)
+    if l=="Net Weight" and not d["pack_size"]: d["pack_size"]=nextval(i)
+    if l=="Place of Origin":         d["origin"]=nextval(i)
+    if l=="Product Type":            d["ptype"]=nextval(i)
+    if l.startswith("Sold by"):      d["soldby"]=l.replace("Sold by","").strip()
+# price: the $x.xx just before "Add to cart", skipping the $60.00 free-delivery banner
+cut=txt.find("Add to cart"); seg=txt[:cut] if cut>0 else txt
+ps=[p for p in re.findall(r"\$[\d,]+\.\d{2}", seg) if p!="$60.00"]
+if ps: d["price"]=ps[-1]
+# title: last segment of the tab-joined breadcrumb line
+for l in lines:
+    if "\t" in l and ("Groceries" in l or "Frozen" in l):
+        d["title"]=l.split("\t")[-1].strip(); break
+```
+
+Fields available: Brand, Pack Size (or Net Weight), Place of Origin, Product Type
+(e.g. "Frozen"), price, Sold-by seller, breadcrumb category. RedMart-fulfilled listings
+(sellers like "Kirei Japanese Food Supply", "Soon Seng Huat") reliably populate the grid;
+so do most marketplace grocery sellers once you scroll far enough.
+
+## Price gotcha: card price ≠ RSP
+
+Search-result cards may show a **voucher/promo** price (e.g. $6.80) while the PDP shows the
+true RSP (e.g. $7.90). For an accurate survey, open the PDP and read its price for RSP, and
+record the card price as the promo. The "Platinum save $X / Coin save $X" lines in card
+text are loyalty deltas, not the price — ignore them.
+
+## Age-verification popup
+
+Some queries trigger a "you must be at least 21 years" modal that blocks the grid. Dismiss
+by button text (don't click pixels — layout shifts):
+```python
+js("Array.from(document.querySelectorAll('button')).find(b=>b.innerText==='Over 21')?.click()")
+wait(1)
+```
+
+## Tab hygiene
+
+Reuse one tab; don't `new_tab` per search. Close when done.
+```python
+tid = new_tab("https://www.lazada.sg/catalog/?q=frozen+ramen"); wait(4)
+# ... reuse with goto_url for each subsequent query ...
+close_tab(tid)
+# end-of-task sweep:
+for t in list_tabs(include_chrome=False):
+    if "lazada.sg" in t["url"]: close_tab(t["targetId"])
+```
+
+## Exhaustiveness tip
+
+One keyword misses SKUs. Run several phrasings and union by product name, e.g. for ramen:
+`"frozen ramen"`, `"frozen ramen noodle soup"`, `"japanese frozen ramen"`; for udon:
+`"frozen udon"`, `"frozen udon noodle soup"`, `"sanuki udon frozen"`. Lazada/RedMart had
+far deeper frozen Japanese-noodle coverage than Shopee — it's the primary source for this
+category.
+
+## Gotchas (field-tested)
+
+- **No login wall** — anonymous catalog search works (the opposite of Shopee).
+- **Big `js()` serialisations return `None`** — filter + cap (≤~60 items) inside the JS.
+- **Price-anchored extraction** beats href/selector hunting on result pages.
+- **One invocation per nav+extract** — separate calls re-attach and may return `None`.
+- **PDP spec values are 1-4 lines below the label** (blank lines between) — scan forward.
+- **First `$` on a PDP is the `$60.00` delivery banner** — take the price before "Add to cart".
+- **Card price may be a voucher price**, not RSP — confirm RSP on the PDP.
+- **`/catalog/` 302s to `/tag/`** — expected, results still load.
+- **Age popup** on some queries — click the "Over 21" button by text.
+- **RedMart = Lazada** — same search and PDP; `redmart.lazada.sg` redirects to `www.lazada.sg`.
+- **Reuse one tab, close when done** — avoid tab pile-up.
diff --git a/agent-workspace/domain-skills/shopee/product-search.md b/agent-workspace/domain-skills/shopee/product-search.md
new file mode 100644
index 00000000..ec36b701
--- /dev/null
+++ b/agent-workspace/domain-skills/shopee/product-search.md
@@ -0,0 +1,176 @@
+# Shopee (shopee.sg) — Product Search & Data Extraction
+
+Field-tested against shopee.sg on 2026-05-29 using a **logged-in** Chrome session
+(competitor price survey for frozen ramen/udon). Shopee SG is heavy, lazy-rendered,
+and bot-defended. The patterns below are what actually survived; the naive Playwright
+reflexes (wait_for_load, full screenshot, JS scroll, search API) all fail here.
+
+## Prerequisites — you MUST be logged in
+
+Anonymous sessions are blocked. Navigating to any `shopee.sg/...` URL while logged out
+redirects to a traffic-verification wall:
+
+```
+https://shopee.sg/verify/traffic/error?...&is_logged_in=false&...&type=4
+title: "Shopee Singapore | Cheaper, Faster On Shopee"
+body : "Page Unavailable — Looks like you're not logged in yet."
+```
+
+There is no programmatic bypass. If you land on `/verify/traffic/error`, stop and ask the
+user to log into Shopee in their Chrome, then retry. Detect it early:
+
+```python
+def shopee_blocked():
+    return "/verify/traffic/error" in page_info()["url"]
+```
+
+The search API is also blocked — do **not** waste a call on it:
+```python
+# http_get("https://shopee.sg/api/v4/search/search_items?keyword=...") -> HTTP 403
+```
+All extraction must go through the rendered DOM in the logged-in browser.
+
+## Tab hygiene (do this — Shopee tabs pile up fast)
+
+Open Shopee **once** with `new_tab`, then reuse that one tab with `goto_url` for every
+subsequent search. Close it when the task is done. Do NOT call `new_tab` per search — that
+is how you end up with dozens of orphan tabs.
+
+```python
+# first search of the session
+tid = new_tab("https://shopee.sg/search?keyword=frozen%20ramen")
+wait(8)
+
+# every later search: reuse the SAME tab
+goto_url("https://shopee.sg/search?keyword=frozen%20udon")
+wait(8)
+
+# when finished with Shopee entirely
+close_tab(tid)            # or close_tab() to close the current tab
+```
+
+If you spawned strays during exploration, sweep them at the end:
+```python
+for t in list_tabs(include_chrome=False):
+    if "shopee.sg" in t["url"]:
+        close_tab(t["targetId"])
+```
+
+## Navigation
+
+### Search URL (only reliable entry point)
+```python
+goto_url("https://shopee.sg/search?keyword=frozen%20ramen%20soup")  # spaces = %20 (or +)
+wait(8)   # NOT wait_for_load() — see Gotchas
+```
+- Pagination: append `&page=N` (0-indexed). The UI shows a `1/N` page counter.
+- Sort: the UI exposes Relevance / Latest / Top Sales / Price tabs; relevance (default) is fine
+  for surveys. Sorting via URL params is unreliable — click the tab if you must.
+
+### Product detail page
+Product URLs carry the id pattern `...-i.<shopId>.<itemId>`:
+```python
+goto_url("https://shopee.sg/product-name-i.123456.7890123")
+wait(7)
+```
+
+## The three things that WILL bite you
+
+### 1. `wait_for_load()` times out — use a fixed `wait()`
+Shopee's main thread stays busy long after `readyState=complete`, so the
+`js("document.readyState")` poll inside `wait_for_load()` raises a CDP timeout.
+Use a hard `wait(7)`–`wait(9)` after navigation instead.
+
+### 2. Full-page screenshots time out — use `full=False`
+The results page is very tall; `capture_screenshot()` (full page) exceeds the IPC
+deadline. Always pass `full=False` for a viewport-only grab:
+```python
+img = capture_screenshot(full=False)
+```
+
+### 3. The result grid lazy-renders, and `window.scrollTo` can hang
+After navigation the SEARCH FILTER sidebar appears but the product grid is blank until
+the viewport scrolls. `js("window.scrollTo(0, N)")` itself sometimes times out because
+the main thread is blocked. The reliable nudge is a CDP **keyboard End** event, then wait:
+
+```python
+cdp("Input.dispatchKeyEvent", type="keyDown", key="End", windowsVirtualKeyCode=35)
+cdp("Input.dispatchKeyEvent", type="keyUp",   key="End", windowsVirtualKeyCode=35)
+wait(3)
+```
+
+A blank grid shows `len(js("document.body.innerText"))` ≈ 150–200. After the grid
+renders it jumps to ~3000+. If a `js()`/screenshot call raises `TimeoutError`, the page
+was mid-render — just retry the read in a **separate** call (the page keeps loading in
+the background; do not restart the daemon for this).
+
+## Search results extraction — parse innerText, not selectors
+
+The DOM cards (`li.shopee-search-item-result__item`) render lazily and querySelector often
+returns 0 right after load. The robust method is parsing `document.body.innerText`, where
+each result is laid out as **separate lines** with the `$` on its own line:
+
+```
+<product name>
+$
+12.90
+-15%            (optional discount line)
+4.9             (rating, optional)
+2k+ sold        (optional)
+2 Days          (delivery, optional)
+SG
+Find Similar
+```
+
+Parser (find the `$` line, take the line before as name, the line after as price):
+```python
+import re
+txt = js("document.body.innerText") or ""
+lines = [l.strip() for l in txt.split("\n") if l.strip()]
+out, seen = [], set()
+for i, l in enumerate(lines):
+    if l == "$" and i+1 < len(lines) and re.match(r"^[\d,]+\.\d{2}$", lines[i+1]):
+        name = lines[i-1]
+        if len(name) > 8 and "sold" not in name and name not in seen:
+            seen.add(name)
+            out.append(("$" + lines[i+1], name))
+```
+
+Results start just after the line `Search result for '<keyword>'` and the `1/N` counter;
+everything before that (trending keywords, filter labels) is chrome — the `len>8` and
+`"sold" not in name` guards drop most of it.
+
+Selector fallback (only works once cards are in view — scroll first):
+```python
+js("document.querySelectorAll('li.shopee-search-item-result__item').length")
+```
+
+## Relevance filtering (Shopee search is noisy)
+
+A query like "frozen ramen" returns mostly **instant cup/packet noodles, soup-base
+concentrates, and restaurant catering bundles** — not frozen retail packs. Filter by
+keyword in the product name and sanity-check the price band. In this survey, genuine
+frozen retail ramen/udon was scarce on Shopee; the real catalogue depth was on
+Lazada/RedMart. Don't assume an empty/odd result set is a bug — Shopee genuinely may not
+stock the SKU.
+
+## Daemon / IPC recovery
+
+Heavy Shopee pages occasionally throw `TimeoutError: timed out` from the IPC layer.
+Escalate gently:
+1. Retry the same read in a new `browser-harness` call (cheapest; usually enough).
+2. `ensure_real_tab()` then retry, if the session looks detached.
+3. `restart_daemon()` only as a last resort — on Chrome 144+ it re-triggers the
+   "Allow remote debugging?" popup, which the user must click again. Avoid mid-task.
+
+## Gotchas (field-tested)
+
+- **Logged-out = hard wall** at `/verify/traffic/error` (`type=4`). No bypass; ask the user to log in.
+- **Search API → 403.** Browser DOM only.
+- **`wait_for_load()` times out.** Use `wait(7-9)`.
+- **`capture_screenshot()` (full page) times out.** Use `capture_screenshot(full=False)`.
+- **`window.scrollTo` can hang.** Nudge with a CDP `End` key event, then `wait(3)`.
+- **Prices are split text nodes.** In innerText the `$` and the number are on separate lines — parse accordingly; `querySelector('[class*=price]')` is brittle.
+- **Cards render lazily.** querySelector count is 0 until the grid scrolls into view.
+- **Reuse one tab.** `goto_url` for repeat searches; `close_tab()` when done. Never `new_tab` per query.
+- **Noisy results.** Instant noodles / soup bases / catering dominate "frozen" queries — filter by name.

From 9994b3563147cc8de942c706b0fcbe57b9299f2c Mon Sep 17 00:00:00 2001
From: Eclair Ji Li <eclairjili@ajisin.local>
Date: Thu, 4 Jun 2026 13:29:54 +0800
Subject: [PATCH 2/2] Remove dead-code stub in laz_detail()

The breadcrumb-scan loop body was just `pass` and did nothing; the real breadcrumb/title extraction happens lower in the function. No behavior change.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 agent-workspace/agent_helpers.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/agent-workspace/agent_helpers.py b/agent-workspace/agent_helpers.py
index 414e4a04..e74d13ca 100644
--- a/agent-workspace/agent_helpers.py
+++ b/agent-workspace/agent_helpers.py
@@ -95,10 +95,6 @@ def laz_detail():
     d = {"title": "", "brand": "", "pack_size": "", "origin": "",
          "ptype": "", "price": "", "soldby": "", "breadcrumb": ""}
     import re
-    # breadcrumb line contains tab-joined path ending in the title
-    for l in lines:
-        if "Groceries" in l and "\t" not in l and len(l) > 20:
-            pass
     # title: the heading repeats right after breadcrumb; take the longest
     # line that looks like a product title
     def nextval(i):