diff --git a/converters/dbt/pyproject.toml b/converters/dbt/pyproject.toml index f1bb3b0..abc40a8 100644 --- a/converters/dbt/pyproject.toml +++ b/converters/dbt/pyproject.toml @@ -26,6 +26,7 @@ packages = ["src/osi_dbt"] [tool.uv] dev-dependencies = [ + "jsonschema>=4.0", "pytest>=8.0", "syrupy>=4.0", ] diff --git a/converters/dbt/src/osi_dbt/msi_to_osi.py b/converters/dbt/src/osi_dbt/msi_to_osi.py index 6a7dbec..f0f4cf9 100644 --- a/converters/dbt/src/osi_dbt/msi_to_osi.py +++ b/converters/dbt/src/osi_dbt/msi_to_osi.py @@ -105,7 +105,6 @@ def convert( return ConverterResult( output=OSIDocument( version="0.2.0.dev0", - dialects=[self._dialect], semantic_model=[ OSISemanticModel( name=osi_model_name, diff --git a/converters/dbt/tests/test_msi_to_osi.py b/converters/dbt/tests/test_msi_to_osi.py index 686106b..4a082fc 100644 --- a/converters/dbt/tests/test_msi_to_osi.py +++ b/converters/dbt/tests/test_msi_to_osi.py @@ -285,7 +285,6 @@ def test_default_dialect_is_ansi_sql(self) -> None: ) result = MSIToOSIConverter().convert(_manifest(semantic_models=[sm])).output - assert result.dialects == [OSIDialect.ANSI_SQL] assert _fields(result)[0].expression.dialects[0].dialect == OSIDialect.ANSI_SQL def test_configurable_dialect(self) -> None: @@ -295,7 +294,6 @@ def test_configurable_dialect(self) -> None: ) result = MSIToOSIConverter(dialect=OSIDialect.SNOWFLAKE).convert(_manifest(semantic_models=[sm])).output - assert result.dialects == [OSIDialect.SNOWFLAKE] assert _fields(result)[0].expression.dialects[0].dialect == OSIDialect.SNOWFLAKE diff --git a/converters/dbt/tests/test_schema_conformance.py b/converters/dbt/tests/test_schema_conformance.py new file mode 100644 index 0000000..ce898a5 --- /dev/null +++ b/converters/dbt/tests/test_schema_conformance.py @@ -0,0 +1,90 @@ +"""Regression guard: converter output must validate against the core OSI schema.""" + +import json +import sys +from pathlib import Path + +import pytest +import yaml + +from osi import OSIDialect +from osi_dbt.msi_to_osi import MSIToOSIConverter + +from metricflow_semantic_interfaces.implementations.semantic_model import PydanticNodeRelation +from metricflow_semantic_interfaces.test_utils import semantic_model_with_guaranteed_meta +from metricflow_semantic_interfaces.type_enums import ( + AggregationType, + DimensionType, + EntityType, + TimeGranularity, +) + +from tests.helpers import _dimension, _entity, _manifest, _measure, _simple_metric + +REPO_ROOT = Path(__file__).resolve().parents[3] +# Reuse the repository's canonical validator instead of reimplementing schema checks. +sys.path.insert(0, str(REPO_ROOT / "validation")) +from validate import validate_schema # noqa: E402 + +SCHEMA_PATH = REPO_ROOT / "core-spec" / "osi-schema.json" + + +def _representative_manifest(): + """A manifest that exercises datasets, keys, dimensions, measures, relationships + and metrics, so the emitted document covers most of the core schema.""" + orders = semantic_model_with_guaranteed_meta( + name="orders", + description="Order facts", + node_relation=PydanticNodeRelation(schema_name="analytics", alias="orders"), + entities=[ + _entity("order_id", entity_type=EntityType.PRIMARY), + _entity("customer", entity_type=EntityType.FOREIGN, expr="customer_id"), + ], + dimensions=[ + _dimension("ds", dim_type=DimensionType.TIME, granularity=TimeGranularity.DAY), + _dimension("status", description="Order status", label="Status"), + ], + measures=[ + _measure("revenue", agg=AggregationType.SUM, expr="amount"), + _measure("order_count", agg=AggregationType.COUNT, expr="order_id"), + ], + ) + customers = semantic_model_with_guaranteed_meta( + name="customers", + description="Customer dimension", + node_relation=PydanticNodeRelation(schema_name="analytics", alias="customers"), + entities=[ + _entity("customer", entity_type=EntityType.PRIMARY, expr="customer_id"), + _entity("email", entity_type=EntityType.UNIQUE), + ], + dimensions=[_dimension("country")], + ) + return _manifest( + semantic_models=[orders, customers], + metrics=[_simple_metric("revenue", "revenue"), _simple_metric("order_count", "order_count")], + ) + + +def _load_schema() -> dict: + return json.loads(SCHEMA_PATH.read_text()) + + +@pytest.mark.parametrize("dialect", [OSIDialect.ANSI_SQL, OSIDialect.SNOWFLAKE]) +def test_converter_output_conforms_to_core_schema(dialect: OSIDialect) -> None: + document = ( + MSIToOSIConverter(dialect=dialect) + .convert(_representative_manifest(), osi_model_name="conformance") + .output + ) + schema = _load_schema() + + # Both public serializations (CLI uses to_osi_yaml) must be schema-conformant. + for serialization, data in ( + ("yaml", yaml.safe_load(document.to_osi_yaml())), + ("json", json.loads(document.to_osi_json())), + ): + errors = validate_schema(data, schema) + assert errors == [], ( + f"{dialect.value} converter output ({serialization}) is not schema-conformant:\n" + + "\n".join(errors) + ) diff --git a/python/src/osi/models.py b/python/src/osi/models.py index 766b6e0..703e058 100644 --- a/python/src/osi/models.py +++ b/python/src/osi/models.py @@ -149,8 +149,6 @@ class OSIDocument(BaseModel): model_config = ConfigDict(frozen=True) version: str = "0.2.0.dev0" - dialects: Optional[list[OSIDialect]] = None - vendors: Optional[list[OSIVendor]] = None semantic_model: list[OSISemanticModel] def to_osi_yaml(self, **kwargs: Any) -> str: