77import abc
88
99from dataclasses import dataclass
10+
11+ from pydantic_core .core_schema import ValidationInfo
1012from sqlglot import exp
1113
1214from sqlmesh .core .console import Console
1315from sqlmesh .core .dialect import schema_
14- from sqlmesh .utils .pydantic import PydanticModel
15- from sqlmesh .core .environment import Environment , EnvironmentStatements
16+ from sqlmesh .utils .pydantic import PydanticModel , field_validator
17+ from sqlmesh .core .environment import Environment , EnvironmentStatements , EnvironmentNamingInfo
1618from sqlmesh .utils .errors import SQLMeshError
17- from sqlmesh .core .snapshot import Snapshot , SnapshotEvaluator
19+ from sqlmesh .core .snapshot import (
20+ Snapshot ,
21+ SnapshotEvaluator ,
22+ SnapshotId ,
23+ SnapshotTableCleanupTask ,
24+ SnapshotTableInfo ,
25+ )
1826
1927if t .TYPE_CHECKING :
2028 from sqlmesh .core .engine_adapter .base import EngineAdapter
21- from sqlmesh .core .state_sync .base import Versions , ExpiredSnapshotBatch , StateReader , StateSync
29+ from sqlmesh .core .state_sync .base import Versions , StateReader , StateSync
2230
2331logger = logging .getLogger (__name__ )
2432
@@ -219,6 +227,109 @@ def __iter__(self) -> t.Iterator[StateStreamContents]:
219227 return _StateStream ()
220228
221229
230+ class ExpiredBatchRange (PydanticModel ):
231+ start : RowBoundary
232+ end : t .Union [RowBoundary , LimitBoundary ]
233+
234+ @classmethod
235+ def init_batch_range (cls , batch_size : int ) -> ExpiredBatchRange :
236+ return ExpiredBatchRange (
237+ start = RowBoundary .lowest_boundary (),
238+ end = LimitBoundary (batch_size = batch_size ),
239+ )
240+
241+ @classmethod
242+ def all_batch_range (cls ) -> ExpiredBatchRange :
243+ return ExpiredBatchRange (
244+ start = RowBoundary .lowest_boundary (),
245+ end = RowBoundary .highest_boundary (),
246+ )
247+
248+
249+ class RowBoundary (PydanticModel ):
250+ updated_ts : int
251+ name : str
252+ identifier : str
253+
254+ @classmethod
255+ def lowest_boundary (cls ) -> RowBoundary :
256+ return RowBoundary (updated_ts = 0 , name = "" , identifier = "" )
257+
258+ @classmethod
259+ def highest_boundary (cls ) -> RowBoundary :
260+ # 9999-12-31T23:59:59.999Z in epoch milliseconds
261+ return RowBoundary (updated_ts = 253_402_300_799_999 , name = "" , identifier = "" )
262+
263+ # def to_upper_batch_boundary(self) -> EndRowBoundary:
264+ # return EndRowBoundary(
265+ # updated_ts=self.updated_ts,
266+ # name=self.name,
267+ # identifier=self.identifier,
268+ # )
269+ #
270+ # def to_lower_batch_boundary(self, batch_size: int) -> StartRowBoundary:
271+ # return StartRowBoundary(
272+ # updated_ts=self.updated_ts,
273+ # name=self.name,
274+ # identifier=self.identifier,
275+ # batch_size=batch_size,
276+ # )
277+
278+
279+ #
280+ #
281+ # class EndRowBoundary(RowBoundary):
282+ # @classmethod
283+ # def include_all_boundary(cls) -> EndRowBoundary:
284+ # # 9999-12-31T23:59:59.999Z in epoch milliseconds
285+ # return EndRowBoundary(updated_ts=253_402_300_799_999, name="", identifier="")
286+ #
287+ # def to_start_batch_boundary(self) -> StartRowBoundary:
288+ # return StartRowBoundary(
289+ # updated_ts=self.updated_ts,
290+ # name=self.name,
291+ # identifier=self.identifier,
292+ # )
293+
294+
295+ class LimitBoundary (PydanticModel ):
296+ batch_size : int
297+
298+ @classmethod
299+ def init_batch_boundary (cls , batch_size : int ) -> LimitBoundary :
300+ return LimitBoundary (batch_size = batch_size )
301+
302+
303+ #
304+ # class StartRowBoundary(RowBoundary):
305+ # @classmethod
306+ # def init_batch_boundary(cls) -> StartRowBoundary:
307+ # return StartRowBoundary(updated_ts=0, name="", identifier="")
308+ #
309+
310+
311+ class PromotionResult (PydanticModel ):
312+ added : t .List [SnapshotTableInfo ]
313+ removed : t .List [SnapshotTableInfo ]
314+ removed_environment_naming_info : t .Optional [EnvironmentNamingInfo ]
315+
316+ @field_validator ("removed_environment_naming_info" )
317+ def _validate_removed_environment_naming_info (
318+ cls , v : t .Optional [EnvironmentNamingInfo ], info : ValidationInfo
319+ ) -> t .Optional [EnvironmentNamingInfo ]:
320+ if v and not info .data .get ("removed" ):
321+ raise ValueError ("removed_environment_naming_info must be None if removed is empty" )
322+ return v
323+
324+
325+ class ExpiredSnapshotBatch (PydanticModel ):
326+ """A batch of expired snapshots to be cleaned up."""
327+
328+ expired_snapshot_ids : t .Set [SnapshotId ]
329+ cleanup_tasks : t .List [SnapshotTableCleanupTask ]
330+ batch_range : ExpiredBatchRange
331+
332+
222333def iter_expired_snapshot_batches (
223334 state_reader : StateReader ,
224335 * ,
@@ -234,24 +345,29 @@ def iter_expired_snapshot_batches(
234345 ignore_ttl: If True, include snapshots regardless of TTL (only checks if unreferenced).
235346 batch_size: Maximum number of snapshots to fetch per batch.
236347 """
237- from sqlmesh .core .state_sync .base import LowerBatchBoundary
238348
239349 batch_size = batch_size if batch_size is not None else EXPIRED_SNAPSHOT_DEFAULT_BATCH_SIZE
240- batch_boundary = LowerBatchBoundary . init_batch_boundary (batch_size = batch_size )
350+ batch_range = ExpiredBatchRange . init_batch_range (batch_size = batch_size )
241351
242352 while True :
243353 batch = state_reader .get_expired_snapshots (
244354 current_ts = current_ts ,
245355 ignore_ttl = ignore_ttl ,
246- batch_boundary = batch_boundary ,
356+ batch_range = batch_range ,
247357 )
248358
249359 if batch is None :
250360 return
251361
252362 yield batch
253363
254- batch_boundary = batch .batch_boundary .to_lower_batch_boundary (batch_size = batch_size )
364+ assert isinstance (batch .batch_range .end , RowBoundary ), (
365+ "Only RowBoundary is supported for pagination currently"
366+ )
367+ batch_range = ExpiredBatchRange (
368+ start = batch .batch_range .end ,
369+ end = LimitBoundary (batch_size = batch_size ),
370+ )
255371
256372
257373def delete_expired_snapshots (
@@ -286,17 +402,25 @@ def delete_expired_snapshots(
286402 ignore_ttl = ignore_ttl ,
287403 batch_size = batch_size ,
288404 ):
405+ end_info = (
406+ f"updated_ts={ batch .batch_range .end .updated_ts } "
407+ if isinstance (batch .batch_range .end , RowBoundary )
408+ else f"limit={ batch .batch_range .end .batch_size } "
409+ )
289410 logger .info (
290- "Processing batch of size %s and max_updated_ts of %s" ,
411+ "Processing batch of size %s with end %s" ,
291412 len (batch .expired_snapshot_ids ),
292- batch . batch_boundary . updated_ts ,
413+ end_info ,
293414 )
294415 snapshot_evaluator .cleanup (
295416 target_snapshots = batch .cleanup_tasks ,
296417 on_complete = console .update_cleanup_progress if console else None ,
297418 )
298419 state_sync .delete_expired_snapshots (
299- upper_batch_boundary = batch .batch_boundary .to_upper_batch_boundary (),
420+ batch_range = ExpiredBatchRange (
421+ start = RowBoundary .lowest_boundary (),
422+ end = batch .batch_range .end ,
423+ ),
300424 ignore_ttl = ignore_ttl ,
301425 )
302426 logger .info ("Cleaned up expired snapshots batch" )
0 commit comments