Skip to content

Commit 6e5eb95

Browse files
Centralize retry and pagination fetch defaults
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent 558832b commit 6e5eb95

16 files changed

Lines changed: 330 additions & 80 deletions

CONTRIBUTING.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ This runs lint, format checks, compile checks, tests, and package build.
9898
- `tests/test_extension_create_helper_usage.py` (extension create-input normalization helper usage enforcement),
9999
- `tests/test_extract_payload_helper_usage.py` (extract start-payload helper usage enforcement),
100100
- `tests/test_guardrail_ast_utils.py` (shared AST guard utility contract),
101+
- `tests/test_job_fetch_helper_boundary.py` (centralization boundary enforcement for retry/paginated-fetch helper primitives),
102+
- `tests/test_job_fetch_helper_usage.py` (shared retry/paginated-fetch defaults helper usage enforcement),
101103
- `tests/test_job_pagination_helper_usage.py` (shared scrape/crawl pagination helper usage enforcement),
102104
- `tests/test_job_start_payload_helper_usage.py` (shared scrape/crawl start-payload helper usage enforcement),
103105
- `tests/test_job_wait_helper_boundary.py` (centralization boundary enforcement for wait-for-job helper primitives),

hyperbrowser/client/managers/async_manager/crawl.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from hyperbrowser.models.consts import POLLING_ATTEMPTS
44
from ...polling import (
55
build_fetch_operation_name,
6-
collect_paginated_results_async,
76
poll_until_terminal_status_async,
8-
retry_operation_async,
7+
)
8+
from ..job_fetch_utils import (
9+
collect_paginated_results_with_defaults_async,
10+
retry_operation_with_defaults_async,
911
)
1012
from ..page_params_utils import build_page_batch_params
1113
from ..job_pagination_utils import (
@@ -97,11 +99,9 @@ async def start_and_wait(
9799
)
98100

99101
if not return_all_pages:
100-
return await retry_operation_async(
102+
return await retry_operation_with_defaults_async(
101103
operation_name=build_fetch_operation_name(operation_name),
102104
operation=lambda: self.get(job_id),
103-
max_attempts=POLLING_ATTEMPTS,
104-
retry_delay_seconds=0.5,
105105
)
106106

107107
job_response = initialize_job_paginated_response(
@@ -111,7 +111,7 @@ async def start_and_wait(
111111
total_counter_alias="totalCrawledPages",
112112
)
113113

114-
await collect_paginated_results_async(
114+
await collect_paginated_results_with_defaults_async(
115115
operation_name=operation_name,
116116
get_next_page=lambda page: self.get(
117117
job_start_resp.job_id,
@@ -131,8 +131,6 @@ async def start_and_wait(
131131
total_counter_attr="total_crawled_pages",
132132
),
133133
max_wait_seconds=max_wait_seconds,
134-
max_attempts=POLLING_ATTEMPTS,
135-
retry_delay_seconds=0.5,
136134
)
137135

138136
return job_response

hyperbrowser/client/managers/async_manager/scrape.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from hyperbrowser.models.consts import POLLING_ATTEMPTS
44
from ...polling import (
55
build_fetch_operation_name,
6-
collect_paginated_results_async,
76
poll_until_terminal_status_async,
8-
retry_operation_async,
7+
)
8+
from ..job_fetch_utils import (
9+
collect_paginated_results_with_defaults_async,
10+
retry_operation_with_defaults_async,
911
)
1012
from ..page_params_utils import build_page_batch_params
1113
from ..job_pagination_utils import (
@@ -107,11 +109,9 @@ async def start_and_wait(
107109
)
108110

109111
if not return_all_pages:
110-
return await retry_operation_async(
112+
return await retry_operation_with_defaults_async(
111113
operation_name=build_fetch_operation_name(operation_name),
112114
operation=lambda: self.get(job_id),
113-
max_attempts=POLLING_ATTEMPTS,
114-
retry_delay_seconds=0.5,
115115
)
116116

117117
job_response = initialize_job_paginated_response(
@@ -121,7 +121,7 @@ async def start_and_wait(
121121
total_counter_alias="totalScrapedPages",
122122
)
123123

124-
await collect_paginated_results_async(
124+
await collect_paginated_results_with_defaults_async(
125125
operation_name=operation_name,
126126
get_next_page=lambda page: self.get(
127127
job_id,
@@ -141,8 +141,6 @@ async def start_and_wait(
141141
total_counter_attr="total_scraped_pages",
142142
),
143143
max_wait_seconds=max_wait_seconds,
144-
max_attempts=POLLING_ATTEMPTS,
145-
retry_delay_seconds=0.5,
146144
)
147145

148146
return job_response

hyperbrowser/client/managers/async_manager/web/batch_fetch.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
build_paginated_page_merge_callback,
1717
initialize_paginated_job_response,
1818
)
19+
from ...job_fetch_utils import (
20+
collect_paginated_results_with_defaults_async,
21+
retry_operation_with_defaults_async,
22+
)
1923
from ....polling import (
2024
build_fetch_operation_name,
21-
collect_paginated_results_async,
2225
poll_until_terminal_status_async,
23-
retry_operation_async,
2426
)
2527
from ...response_utils import parse_response_model
2628
from ...start_job_utils import build_started_job_context
@@ -94,11 +96,9 @@ async def start_and_wait(
9496
)
9597

9698
if not return_all_pages:
97-
return await retry_operation_async(
99+
return await retry_operation_with_defaults_async(
98100
operation_name=build_fetch_operation_name(operation_name),
99101
operation=lambda: self.get(job_id),
100-
max_attempts=POLLING_ATTEMPTS,
101-
retry_delay_seconds=0.5,
102102
)
103103

104104
job_response = initialize_paginated_job_response(
@@ -107,7 +107,7 @@ async def start_and_wait(
107107
status=job_status,
108108
)
109109

110-
await collect_paginated_results_async(
110+
await collect_paginated_results_with_defaults_async(
111111
operation_name=operation_name,
112112
get_next_page=lambda page: self.get(
113113
job_id,
@@ -126,8 +126,6 @@ async def start_and_wait(
126126
job_response=job_response,
127127
),
128128
max_wait_seconds=max_wait_seconds,
129-
max_attempts=POLLING_ATTEMPTS,
130-
retry_delay_seconds=0.5,
131129
)
132130

133131
return job_response

hyperbrowser/client/managers/async_manager/web/crawl.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
build_paginated_page_merge_callback,
1717
initialize_paginated_job_response,
1818
)
19+
from ...job_fetch_utils import (
20+
collect_paginated_results_with_defaults_async,
21+
retry_operation_with_defaults_async,
22+
)
1923
from ....polling import (
2024
build_fetch_operation_name,
21-
collect_paginated_results_async,
2225
poll_until_terminal_status_async,
23-
retry_operation_async,
2426
)
2527
from ...response_utils import parse_response_model
2628
from ...start_job_utils import build_started_job_context
@@ -92,11 +94,9 @@ async def start_and_wait(
9294
)
9395

9496
if not return_all_pages:
95-
return await retry_operation_async(
97+
return await retry_operation_with_defaults_async(
9698
operation_name=build_fetch_operation_name(operation_name),
9799
operation=lambda: self.get(job_id),
98-
max_attempts=POLLING_ATTEMPTS,
99-
retry_delay_seconds=0.5,
100100
)
101101

102102
job_response = initialize_paginated_job_response(
@@ -105,7 +105,7 @@ async def start_and_wait(
105105
status=job_status,
106106
)
107107

108-
await collect_paginated_results_async(
108+
await collect_paginated_results_with_defaults_async(
109109
operation_name=operation_name,
110110
get_next_page=lambda page: self.get(
111111
job_id,
@@ -124,8 +124,6 @@ async def start_and_wait(
124124
job_response=job_response,
125125
),
126126
max_wait_seconds=max_wait_seconds,
127-
max_attempts=POLLING_ATTEMPTS,
128-
retry_delay_seconds=0.5,
129127
)
130128

131129
return job_response
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from typing import Awaitable, Callable, Optional, TypeVar
2+
3+
from hyperbrowser.models.consts import POLLING_ATTEMPTS
4+
5+
from ..polling import (
6+
collect_paginated_results,
7+
collect_paginated_results_async,
8+
retry_operation,
9+
retry_operation_async,
10+
)
11+
12+
T = TypeVar("T")
13+
R = TypeVar("R")
14+
15+
16+
def retry_operation_with_defaults(
17+
*,
18+
operation_name: str,
19+
operation: Callable[[], T],
20+
) -> T:
21+
return retry_operation(
22+
operation_name=operation_name,
23+
operation=operation,
24+
max_attempts=POLLING_ATTEMPTS,
25+
retry_delay_seconds=0.5,
26+
)
27+
28+
29+
async def retry_operation_with_defaults_async(
30+
*,
31+
operation_name: str,
32+
operation: Callable[[], Awaitable[T]],
33+
) -> T:
34+
return await retry_operation_async(
35+
operation_name=operation_name,
36+
operation=operation,
37+
max_attempts=POLLING_ATTEMPTS,
38+
retry_delay_seconds=0.5,
39+
)
40+
41+
42+
def collect_paginated_results_with_defaults(
43+
*,
44+
operation_name: str,
45+
get_next_page: Callable[[int], R],
46+
get_current_page_batch: Callable[[R], int],
47+
get_total_page_batches: Callable[[R], int],
48+
on_page_success: Callable[[R], None],
49+
max_wait_seconds: Optional[float],
50+
) -> None:
51+
collect_paginated_results(
52+
operation_name=operation_name,
53+
get_next_page=get_next_page,
54+
get_current_page_batch=get_current_page_batch,
55+
get_total_page_batches=get_total_page_batches,
56+
on_page_success=on_page_success,
57+
max_wait_seconds=max_wait_seconds,
58+
max_attempts=POLLING_ATTEMPTS,
59+
retry_delay_seconds=0.5,
60+
)
61+
62+
63+
async def collect_paginated_results_with_defaults_async(
64+
*,
65+
operation_name: str,
66+
get_next_page: Callable[[int], Awaitable[R]],
67+
get_current_page_batch: Callable[[R], int],
68+
get_total_page_batches: Callable[[R], int],
69+
on_page_success: Callable[[R], None],
70+
max_wait_seconds: Optional[float],
71+
) -> None:
72+
await collect_paginated_results_async(
73+
operation_name=operation_name,
74+
get_next_page=get_next_page,
75+
get_current_page_batch=get_current_page_batch,
76+
get_total_page_batches=get_total_page_batches,
77+
on_page_success=on_page_success,
78+
max_wait_seconds=max_wait_seconds,
79+
max_attempts=POLLING_ATTEMPTS,
80+
retry_delay_seconds=0.5,
81+
)

hyperbrowser/client/managers/sync_manager/crawl.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from hyperbrowser.models.consts import POLLING_ATTEMPTS
44
from ...polling import (
55
build_fetch_operation_name,
6-
collect_paginated_results,
76
poll_until_terminal_status,
8-
retry_operation,
7+
)
8+
from ..job_fetch_utils import (
9+
collect_paginated_results_with_defaults,
10+
retry_operation_with_defaults,
911
)
1012
from ..page_params_utils import build_page_batch_params
1113
from ..job_pagination_utils import (
@@ -97,11 +99,9 @@ def start_and_wait(
9799
)
98100

99101
if not return_all_pages:
100-
return retry_operation(
102+
return retry_operation_with_defaults(
101103
operation_name=build_fetch_operation_name(operation_name),
102104
operation=lambda: self.get(job_id),
103-
max_attempts=POLLING_ATTEMPTS,
104-
retry_delay_seconds=0.5,
105105
)
106106

107107
job_response = initialize_job_paginated_response(
@@ -111,7 +111,7 @@ def start_and_wait(
111111
total_counter_alias="totalCrawledPages",
112112
)
113113

114-
collect_paginated_results(
114+
collect_paginated_results_with_defaults(
115115
operation_name=operation_name,
116116
get_next_page=lambda page: self.get(
117117
job_start_resp.job_id,
@@ -131,8 +131,6 @@ def start_and_wait(
131131
total_counter_attr="total_crawled_pages",
132132
),
133133
max_wait_seconds=max_wait_seconds,
134-
max_attempts=POLLING_ATTEMPTS,
135-
retry_delay_seconds=0.5,
136134
)
137135

138136
return job_response

hyperbrowser/client/managers/sync_manager/scrape.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
from hyperbrowser.models.consts import POLLING_ATTEMPTS
44
from ...polling import (
55
build_fetch_operation_name,
6-
collect_paginated_results,
76
poll_until_terminal_status,
8-
retry_operation,
7+
)
8+
from ..job_fetch_utils import (
9+
collect_paginated_results_with_defaults,
10+
retry_operation_with_defaults,
911
)
1012
from ..page_params_utils import build_page_batch_params
1113
from ..job_pagination_utils import (
@@ -105,11 +107,9 @@ def start_and_wait(
105107
)
106108

107109
if not return_all_pages:
108-
return retry_operation(
110+
return retry_operation_with_defaults(
109111
operation_name=build_fetch_operation_name(operation_name),
110112
operation=lambda: self.get(job_id),
111-
max_attempts=POLLING_ATTEMPTS,
112-
retry_delay_seconds=0.5,
113113
)
114114

115115
job_response = initialize_job_paginated_response(
@@ -119,7 +119,7 @@ def start_and_wait(
119119
total_counter_alias="totalScrapedPages",
120120
)
121121

122-
collect_paginated_results(
122+
collect_paginated_results_with_defaults(
123123
operation_name=operation_name,
124124
get_next_page=lambda page: self.get(
125125
job_id,
@@ -139,8 +139,6 @@ def start_and_wait(
139139
total_counter_attr="total_scraped_pages",
140140
),
141141
max_wait_seconds=max_wait_seconds,
142-
max_attempts=POLLING_ATTEMPTS,
143-
retry_delay_seconds=0.5,
144142
)
145143

146144
return job_response

0 commit comments

Comments
 (0)