From 96fa168a4e19e60b0885d251f6d8d2aeb195fe8c Mon Sep 17 00:00:00 2001 From: Manuel Bellersen Date: Wed, 27 Aug 2025 08:49:45 +0200 Subject: [PATCH] GH-62: refactor(metadata): Rename ColumnProfile to ColumnInformation The Pydantic model `ColumnProfile` and its corresponding field `column_profile` have been renamed to `ColumnInformation` and `columns` respectively. This change improves clarity and consistency. "Information" is a more accurate and general term for the data being stored (name, role, statistics) than "Profile". The field name `columns` is more concise and conventional. All related functions, variables, test fixtures, and test data have been updated to reflect this renaming. No functional changes are introduced. GH-62: refactor: Add support for text column statistics Introduces support for calculating and storing summary statistics for columns with the `text` role. This includes: - Adding a `ColumnStatisticsText` model. - Refactoring `ColumnStatistics` models to use a common `ColumnStatisticsBase` class, reducing code duplication. - Making statistical fields (like min, max, avg) optional to correctly handle columns with only NULL values. - Introducing a `ColumnType` enum to replace string literals for column types, improving type safety and readability. - Updating test fixtures and helpers to align with the new data models and support the `text` role. GH-62: ci: Optimize workflow triggers with path filtering Adds path filters to the pull_request and push triggers for the Python test workflow. This prevents the workflow from running on changes unrelated to the source code, tests, or project configuration (e.g., documentation updates). Additionally, this change adds a `push` trigger to run tests on merges to `main` and `develop`, ensuring the integrity of the primary branches. GH-62: refactor: Generalize Parquet serialization and statistics logic Extract the logic for serializing data to Parquet and calculating column statistics into a new, more generic `serialize_dataframe` function in `serialize/parquet.py`. This new function is now used for serializing both getML DataFrames/Views and prediction results. This refactoring improves code reuse and separation of concerns. Key changes: - `serialize_dataframe` accepts callables for saving the Parquet file and retrieving column roles, decoupling it from specific data structures like `getml.DataFrame`. - `serialize_predictions` now creates a `pyarrow.Table` directly from the numpy array and uses the new `serialize_dataframe` function, avoiding the overhead of creating an intermediate `getml.DataFrame`. - Moves shared logic out of `serialize_dataframe_or_view.py`, simplifying it significantly. GH-62: refactor(pydantic): Use `frozen=True` for immutable models Modernize Pydantic model definitions by replacing the `model_config` dictionary with the `frozen=True` class keyword argument. This change simplifies the code, makes it more readable, and aligns with current Pydantic v2 best practices for creating immutable models. No functional changes are introduced. --- .github/workflows/python-tests.yml | 15 +- src/getml_io/getml/feature_learning.py | 24 +- src/getml_io/getml/features.py | 7 +- src/getml_io/getml/predictors.py | 28 +- src/getml_io/getml/preprocessors.py | 32 +- src/getml_io/getml/project.py | 10 +- src/getml_io/getml/project_information.py | 8 +- src/getml_io/getml/roles.py | 7 +- src/getml_io/getml/scores.py | 6 +- .../metadata/container_information.py | 8 +- .../metadata/data_model_information.py | 7 +- .../metadata/dataframe_information.py | 110 +-- src/getml_io/metadata/pipeline_information.py | 7 +- .../metadata/placeholder_information.py | 11 +- src/getml_io/serialize/dataframe_or_view.py | 157 +-- src/getml_io/serialize/parquet.py | 209 ++++ src/getml_io/serialize/pipeline.py | 25 +- src/getml_io/utils/convert.py | 2 +- tests/integration/assertions.py | 30 +- tests/integration/data/cora/cora.py | 2 +- tests/integration/data/datasets.py | 7 +- tests/integration/data/getmlproject.py | 11 +- .../data/loans/expected.container.json | 10 +- .../data/loans/expected.pipeline.json | 354 +++---- tests/integration/data/loans/loans.py | 2 +- .../data/numerical/expected.container.json | 6 +- .../data/numerical/expected.pipeline.json | 428 ++++----- tests/integration/data/numerical/numerical.py | 2 +- .../data/robot/expected.container.json | 8 +- .../data/robot/expected.pipeline.json | 898 +++++++++--------- tests/integration/data/robot/robot.py | 2 +- tests/unit/conftest.py | 326 +++++-- .../metadata/test_container_information.py | 14 +- .../metadata/test_pipeline_information.py | 24 +- tests/unit/serialize/test_container.py | 88 +- .../unit/serialize/test_dataframe_or_view.py | 132 +-- tests/unit/serialize/test_parquet.py | 157 +++ tests/unit/serialize/test_pipeline.py | 6 +- tests/unit/types.py | 6 +- 39 files changed, 1680 insertions(+), 1506 deletions(-) create mode 100644 src/getml_io/serialize/parquet.py create mode 100644 tests/unit/serialize/test_parquet.py diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index cfd58ab..4eb2b3c 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -1,7 +1,20 @@ name: Python Linting, Formatting, Testing, Coverage on: pull_request: - branches: [main, develop] + paths: + - 'src/**' + - 'tests/**' + - '.github/workflows/**' + - 'pyproject.toml' + push: + branches: + - main + - develop + paths: + - 'src/**' + - 'tests/**' + - '.github/workflows/**' + - 'pyproject.toml' jobs: test: runs-on: ubuntu-latest diff --git a/src/getml_io/getml/feature_learning.py b/src/getml_io/getml/feature_learning.py index 329d9aa..9084720 100644 --- a/src/getml_io/getml/feature_learning.py +++ b/src/getml_io/getml/feature_learning.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Set as AbstractSet -from typing import Annotated, ClassVar, Literal +from typing import Annotated, Literal from getml.feature_learning.aggregations.types import ( FastPropAggregations, @@ -11,12 +11,10 @@ CrossEntropyLossType, SquareLossType, ) -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field -class FastProp(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class FastProp(BaseModel, frozen=True): aggregation: AbstractSet[FastPropAggregations] delta_t: float loss_function: CrossEntropyLossType | SquareLossType | None @@ -31,9 +29,7 @@ class FastProp(BaseModel): type: Literal["fast_prop"] = "fast_prop" -class Fastboost(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Fastboost(BaseModel, frozen=True): gamma: float loss_function: CrossEntropyLossType | SquareLossType | None max_depth: int @@ -48,9 +44,7 @@ class Fastboost(BaseModel): type: Literal["fastboost"] = "fastboost" -class Multirel(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Multirel(BaseModel, frozen=True): aggregation: AbstractSet[MultirelAggregations] allow_sets: bool delta_t: float @@ -75,9 +69,7 @@ class Multirel(BaseModel): type: Literal["multirel"] = "multirel" -class Relboost(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Relboost(BaseModel, frozen=True): allow_null_weights: bool delta_t: float gamma: float @@ -98,9 +90,7 @@ class Relboost(BaseModel): type: Literal["relboost"] = "relboost" -class RelMT(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class RelMT(BaseModel, frozen=True): allow_avg: bool delta_t: float gamma: float diff --git a/src/getml_io/getml/features.py b/src/getml_io/getml/features.py index cc6793f..102f324 100644 --- a/src/getml_io/getml/features.py +++ b/src/getml_io/getml/features.py @@ -1,12 +1,9 @@ from collections.abc import Mapping -from typing import ClassVar -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel -class Feature(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Feature(BaseModel, frozen=True): name: str index: int target: str diff --git a/src/getml_io/getml/predictors.py b/src/getml_io/getml/predictors.py index a557ad1..e2c23d0 100644 --- a/src/getml_io/getml/predictors.py +++ b/src/getml_io/getml/predictors.py @@ -1,29 +1,23 @@ from __future__ import annotations -from typing import Annotated, ClassVar, Literal +from typing import Annotated, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field -class LinearRegression(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class LinearRegression(BaseModel, frozen=True): learning_rate: float reg_lambda: float type: Literal["linear_regression"] = "linear_regression" -class LogisticRegression(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class LogisticRegression(BaseModel, frozen=True): learning_rate: float reg_lambda: float type: Literal["logistic_regression"] = "logistic_regression" -class ScaleGBMClassifier(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class ScaleGBMClassifier(BaseModel, frozen=True): colsample_bylevel: float colsample_bytree: float early_stopping_rounds: int @@ -41,9 +35,7 @@ class ScaleGBMClassifier(BaseModel): type: Literal["scale_gbm_classifier"] = "scale_gbm_classifier" -class ScaleGBMRegressor(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class ScaleGBMRegressor(BaseModel, frozen=True): colsample_bylevel: float colsample_bytree: float early_stopping_rounds: int @@ -61,9 +53,7 @@ class ScaleGBMRegressor(BaseModel): type: Literal["scale_gbm_regressor"] = "scale_gbm_regressor" -class XGBoostClassifier(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class XGBoostClassifier(BaseModel, frozen=True): booster: str colsample_bylevel: float colsample_bytree: float @@ -90,9 +80,7 @@ class XGBoostClassifier(BaseModel): type: Literal["xgboost_classifier"] = "xgboost_classifier" -class XGBoostRegressor(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class XGBoostRegressor(BaseModel, frozen=True): booster: str colsample_bylevel: float colsample_bytree: float diff --git a/src/getml_io/getml/preprocessors.py b/src/getml_io/getml/preprocessors.py index 56e9bc8..d08efa9 100644 --- a/src/getml_io/getml/preprocessors.py +++ b/src/getml_io/getml/preprocessors.py @@ -1,45 +1,35 @@ from __future__ import annotations from collections.abc import Set as AbstractSet -from typing import Annotated, ClassVar, Literal +from typing import Annotated, Literal from getml.feature_learning.aggregations.types import MappingAggregations -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field -class CategoryTrimmer(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class CategoryTrimmer(BaseModel, frozen=True): max_num_categories: int min_freq: int type: Literal["category_trimmer"] = "category_trimmer" -class EmailDomain(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class EmailDomain(BaseModel, frozen=True): type: Literal["email_domain"] = "email_domain" -class Imputation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Imputation(BaseModel, frozen=True): add_dummies: bool type: Literal["imputation"] = "imputation" -class Mapping(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Mapping(BaseModel, frozen=True): aggregation: AbstractSet[MappingAggregations] min_freq: int multithreading: bool type: Literal["mapping"] = "mapping" -class Seasonal(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Seasonal(BaseModel, frozen=True): disable_year: bool disable_month: bool disable_weekday: bool @@ -48,18 +38,14 @@ class Seasonal(BaseModel): type: Literal["seasonal"] = "seasonal" -class Substring(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Substring(BaseModel, frozen=True): begin: int length: int unit: str type: Literal["substring"] = "substring" -class TextFieldSplitter(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class TextFieldSplitter(BaseModel, frozen=True): type: Literal["text_field_splitter"] = "text_field_splitter" diff --git a/src/getml_io/getml/project.py b/src/getml_io/getml/project.py index 5a336b9..56c0004 100644 --- a/src/getml_io/getml/project.py +++ b/src/getml_io/getml/project.py @@ -1,11 +1,10 @@ import logging from collections.abc import Generator from contextlib import contextmanager -from typing import ClassVar from getml.data import Container from getml.pipeline import Pipeline -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from getml_io.getml.exception import ( PipelineNotFoundError, @@ -25,12 +24,7 @@ logger: logging.Logger = logging.getLogger(__name__) -class Project(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict( - arbitrary_types_allowed=True, - frozen=True, - ) - +class Project(BaseModel, frozen=True, arbitrary_types_allowed=True): name: str pipeline: Pipeline container: Container diff --git a/src/getml_io/getml/project_information.py b/src/getml_io/getml/project_information.py index cd45415..7e3a1ff 100644 --- a/src/getml_io/getml/project_information.py +++ b/src/getml_io/getml/project_information.py @@ -1,11 +1,7 @@ -from typing import ClassVar +from pydantic import BaseModel -from pydantic import BaseModel, ConfigDict - - -class ProjectInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) +class ProjectInformation(BaseModel, frozen=True): project_name: str pipeline_id: str container_id: str diff --git a/src/getml_io/getml/roles.py b/src/getml_io/getml/roles.py index 36d4e2f..b3d7aac 100644 --- a/src/getml_io/getml/roles.py +++ b/src/getml_io/getml/roles.py @@ -2,10 +2,9 @@ from collections.abc import Sequence from enum import Enum -from typing import ClassVar from getml.data import roles -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel class Role(str, Enum): @@ -19,9 +18,7 @@ class Role(str, Enum): UNUSED_STRING = roles.unused_string -class Roles(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class Roles(BaseModel, frozen=True): categorical: Sequence[str] join_key: Sequence[str] numerical: Sequence[str] diff --git a/src/getml_io/getml/scores.py b/src/getml_io/getml/scores.py index 20c8df1..981fa8f 100644 --- a/src/getml_io/getml/scores.py +++ b/src/getml_io/getml/scores.py @@ -5,20 +5,20 @@ from pydantic import BaseModel, Field -class _Score(BaseModel): +class _Score(BaseModel, frozen=True): date_time: datetime set_used: str target: str -class ClassificationScore(_Score): +class ClassificationScore(_Score, frozen=True): accuracy: float auc: float cross_entropy: float type: Literal["classification"] = "classification" -class RegressionScore(_Score): +class RegressionScore(_Score, frozen=True): mae: float rmse: float rsquared: float diff --git a/src/getml_io/metadata/container_information.py b/src/getml_io/metadata/container_information.py index 6fbc64b..628965e 100644 --- a/src/getml_io/metadata/container_information.py +++ b/src/getml_io/metadata/container_information.py @@ -1,8 +1,6 @@ from __future__ import annotations -from typing import ClassVar - -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from getml_io.metadata.dataframe_information import ( DataFrameInformation, @@ -10,9 +8,7 @@ ) -class ContainerInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class ContainerInformation(BaseModel, frozen=True): id: str population: DataFrameInformation | None peripheral: DataFrameInformationByName diff --git a/src/getml_io/metadata/data_model_information.py b/src/getml_io/metadata/data_model_information.py index 903d494..7f5d4fe 100644 --- a/src/getml_io/metadata/data_model_information.py +++ b/src/getml_io/metadata/data_model_information.py @@ -1,15 +1,12 @@ from __future__ import annotations from collections.abc import Mapping, Sequence -from typing import ClassVar -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from getml_io.metadata.placeholder_information import PlaceholderInformation -class DataModelInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class DataModelInformation(BaseModel, frozen=True): population: PlaceholderInformation peripheral: Mapping[str, Sequence[PlaceholderInformation]] diff --git a/src/getml_io/metadata/dataframe_information.py b/src/getml_io/metadata/dataframe_information.py index 5fa952c..76b6560 100644 --- a/src/getml_io/metadata/dataframe_information.py +++ b/src/getml_io/metadata/dataframe_information.py @@ -2,82 +2,78 @@ from collections.abc import Mapping from datetime import datetime +from enum import Enum from pathlib import Path -from typing import Annotated, ClassVar, Literal +from typing import Annotated, Literal -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field from getml_io.getml.roles import Role -class ColumnStatisticsDouble(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class ColumnStatisticsBase(BaseModel, frozen=True): count: int approx_unique: int - avg: float - min: float - max: float - q25: float - q50: float - q75: float + null_percentage: float | None + + +class ColumnStatisticsDouble(ColumnStatisticsBase, frozen=True): + avg: float | None + min: float | None + max: float | None + q25: float | None + q50: float | None + q75: float | None std: float | None - null_percentage: float column_type: Literal["DOUBLE"] -class ColumnStatisticsVarchar(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - - count: int - approx_unique: int - min: str - max: str - null_percentage: float +class ColumnStatisticsVarchar(ColumnStatisticsBase, frozen=True): + min: str | None + max: str | None column_type: Literal["VARCHAR"] -class ColumnStatisticsNumerical(ColumnStatisticsDouble): +class ColumnStatisticsNumerical(ColumnStatisticsDouble, frozen=True): type: Literal["numerical"] = "numerical" -class ColumnStatisticsTarget(ColumnStatisticsDouble): +class ColumnStatisticsTarget(ColumnStatisticsDouble, frozen=True): type: Literal["target"] = "target" -class ColumnStatisticsTimeStamp(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - - count: int - approx_unique: int - avg: datetime - min: datetime - max: datetime - q25: datetime - q50: datetime - q75: datetime - null_percentage: float +class ColumnStatisticsTimeStamp(ColumnStatisticsBase, frozen=True): + avg: datetime | None + min: datetime | None + max: datetime | None + q25: datetime | None + q50: datetime | None + q75: datetime | None column_type: Literal["TIMESTAMP_NS"] type: Literal["time_stamp"] = "time_stamp" -class ColumnStatisticsTimeStampAsFloat(ColumnStatisticsDouble): +class ColumnStatisticsTimeStampAsFloat(ColumnStatisticsDouble, frozen=True): type: Literal["time_stamp_float"] = "time_stamp_float" -class ColumnStatisticsCategorical(ColumnStatisticsVarchar): +class ColumnStatisticsCategorical(ColumnStatisticsVarchar, frozen=True): type: Literal["categorical"] = "categorical" -class ColumnStatisticsJoinKey(ColumnStatisticsVarchar): +class ColumnStatisticsJoinKey(ColumnStatisticsVarchar, frozen=True): type: Literal["join_key"] = "join_key" -class ColumnStatisticsUnusedFloat(ColumnStatisticsDouble): +class ColumnStatisticsText(ColumnStatisticsVarchar, frozen=True): + type: Literal["text"] = "text" + + +class ColumnStatisticsUnusedFloat(ColumnStatisticsDouble, frozen=True): type: Literal["unused_float"] = "unused_float" -class ColumnStatisticsUnusedString(ColumnStatisticsVarchar): +class ColumnStatisticsUnusedString(ColumnStatisticsVarchar, frozen=True): type: Literal["unused_string"] = "unused_string" @@ -88,38 +84,44 @@ class ColumnStatisticsUnusedString(ColumnStatisticsVarchar): | ColumnStatisticsJoinKey | ColumnStatisticsTimeStamp | ColumnStatisticsTimeStampAsFloat + | ColumnStatisticsText | ColumnStatisticsUnusedFloat | ColumnStatisticsUnusedString, Field(discriminator="type"), ] -class ColumnProfile(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class ColumnInformation(BaseModel, frozen=True): name: str role: Role statistics: ColumnStatistics -class DataFrameInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class DataFrameInformation(BaseModel, frozen=True): name: str path: Path - column_profile: Mapping[str, ColumnProfile] + columns: Mapping[str, ColumnInformation] DataFrameInformationByName = Mapping[str, DataFrameInformation] +class ColumnType(str, Enum): + """Column types supported by GetML-IO based on DuckDBs SUMMARIZE statistics.""" + + DOUBLE = "DOUBLE" + TIMESTAMP_NS = "TIMESTAMP_NS" + VARCHAR = "VARCHAR" + + ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING = { - (Role.CATEGORICAL, "VARCHAR"): ColumnStatisticsCategorical, - (Role.JOIN_KEY, "VARCHAR"): ColumnStatisticsJoinKey, - (Role.NUMERICAL, "DOUBLE"): ColumnStatisticsNumerical, - (Role.TARGET, "DOUBLE"): ColumnStatisticsTarget, - (Role.TIME_STAMP, "TIMESTAMP_NS"): ColumnStatisticsTimeStamp, - (Role.TIME_STAMP, "DOUBLE"): ColumnStatisticsTimeStampAsFloat, - (Role.UNUSED_FLOAT, "DOUBLE"): ColumnStatisticsUnusedFloat, - (Role.UNUSED_STRING, "VARCHAR"): ColumnStatisticsUnusedString, + (Role.CATEGORICAL, ColumnType.VARCHAR): ColumnStatisticsCategorical, + (Role.JOIN_KEY, ColumnType.VARCHAR): ColumnStatisticsJoinKey, + (Role.NUMERICAL, ColumnType.DOUBLE): ColumnStatisticsNumerical, + (Role.TARGET, ColumnType.DOUBLE): ColumnStatisticsTarget, + (Role.TIME_STAMP, ColumnType.TIMESTAMP_NS): ColumnStatisticsTimeStamp, + (Role.TIME_STAMP, ColumnType.DOUBLE): ColumnStatisticsTimeStampAsFloat, + (Role.TEXT, ColumnType.VARCHAR): ColumnStatisticsText, + (Role.UNUSED_FLOAT, ColumnType.DOUBLE): ColumnStatisticsUnusedFloat, + (Role.UNUSED_STRING, ColumnType.VARCHAR): ColumnStatisticsUnusedString, } diff --git a/src/getml_io/metadata/pipeline_information.py b/src/getml_io/metadata/pipeline_information.py index 91b3b83..3030cc2 100644 --- a/src/getml_io/metadata/pipeline_information.py +++ b/src/getml_io/metadata/pipeline_information.py @@ -2,13 +2,12 @@ from collections.abc import Sequence from enum import Enum -from typing import ClassVar from getml.feature_learning.loss_functions import ( CROSSENTROPYLOSS, SQUARELOSS, ) -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from getml_io.getml.feature_learning import FeatureLearner from getml_io.getml.features import Features @@ -25,9 +24,7 @@ class LossFunction(str, Enum): SQUARE_LOSS = SQUARELOSS -class PipelineInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class PipelineInformation(BaseModel, frozen=True): id: str predictions: DataFrameInformationByName feature_sets: DataFrameInformationByName diff --git a/src/getml_io/metadata/placeholder_information.py b/src/getml_io/metadata/placeholder_information.py index 735f4b2..7b9d8d4 100644 --- a/src/getml_io/metadata/placeholder_information.py +++ b/src/getml_io/metadata/placeholder_information.py @@ -1,17 +1,14 @@ from __future__ import annotations from collections.abc import Sequence -from typing import ClassVar -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from getml_io.getml.relationships import Relationship from getml_io.getml.roles import Roles -class JoinInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class JoinInformation(BaseModel, frozen=True): right: PlaceholderInformation on: Sequence[tuple[str, str]] | Sequence[tuple[None, None]] time_stamps: str | tuple[str, str] | None @@ -22,9 +19,7 @@ class JoinInformation(BaseModel): lagged_targets: bool | None -class PlaceholderInformation(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class PlaceholderInformation(BaseModel, frozen=True): name: str roles: Roles joins: Sequence[JoinInformation] diff --git a/src/getml_io/serialize/dataframe_or_view.py b/src/getml_io/serialize/dataframe_or_view.py index 3b31488..c5e0507 100644 --- a/src/getml_io/serialize/dataframe_or_view.py +++ b/src/getml_io/serialize/dataframe_or_view.py @@ -1,35 +1,31 @@ from __future__ import annotations +import functools import logging -from collections.abc import Mapping from logging import Logger from pathlib import Path -from typing import cast -import duckdb from getml.data import ( DataFrame, View, ) -from getml_io.getml.roles import Role from getml_io.metadata.dataframe_information import ( - ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING, - ColumnProfile, - ColumnStatistics, DataFrameInformation, ) -from getml_io.serialize.exception import ( - DataFrameParquetStorageError, - UnsupportedColumnStatisticsError, -) -from getml_io.serialize.roles import serialize_role +from getml_io.serialize.parquet import serialize_dataframe from getml_io.utils.convert import assume_is_str -from getml_io.utils.exception import StorageDirectoryCreationError logger: Logger = logging.getLogger(__name__) +def _save_dataframe_or_view_as_parquet( + path: Path, + dataframe_or_view: DataFrame | View, +) -> None: + dataframe_or_view.to_parquet(str(path)) + + def serialize_dataframe_or_view( dataframe_or_view: DataFrame | View, target_storage_directory: Path, @@ -50,130 +46,15 @@ def serialize_dataframe_or_view( Returns: DataFrameInformation: The serialized DataFrame or View information. - Raises: - StorageDirectoryCreationError: If the target storage directory - cannot be created. - DataFrameParquetStorageError: If storing the DataFrame or View - as a Parquet file fails. - """ - try: - target_storage_directory.mkdir(parents=True, exist_ok=True) - except Exception as exception: - raise StorageDirectoryCreationError(target_storage_directory) from exception - - name = assume_is_str(dataframe_or_view.name) - filename = ( - f"{filename_prefix}.{name}" - if filename_prefix and filename_prefix != name - else name - ) - parquet_filepath = target_storage_directory / f"{filename}.parquet" - try: - dataframe_or_view.to_parquet(str(parquet_filepath)) - except Exception as exception: - raise DataFrameParquetStorageError( - name, - parquet_filepath, - ) from exception - - column_profile = _calculate_column_profile(parquet_filepath, dataframe_or_view) - - return DataFrameInformation( - name=name, - path=parquet_filepath, - column_profile=column_profile, + return serialize_dataframe( + target_storage_directory=target_storage_directory, + save_parquet=functools.partial( + _save_dataframe_or_view_as_parquet, + dataframe_or_view=dataframe_or_view, + ), + dataframe_name=assume_is_str(dataframe_or_view.name), + get_getml_role_by_column=dataframe_or_view.roles.column, + column_names=dataframe_or_view.columns, + filename_prefix=filename_prefix, ) - - -def _calculate_column_profile( - parquet_filepath: Path, - dataframe_or_view: DataFrame | View, -) -> dict[str, ColumnProfile]: - summary_statistics = _calculate_summary_statistics( - parquet_filepath, - dataframe_or_view, - ) - return { - name: ColumnProfile( - name=name, - role=Role(dataframe_or_view.roles.column(name)), - statistics=summary_statistics[name], - ) - for name in dataframe_or_view.columns - } - - -def _calculate_summary_statistics( - parquet_filepath: Path, - dataframe_or_view: DataFrame | View, -) -> dict[str, ColumnStatistics]: - raw_summary_statistics = _fetch_raw_summary_statistics(parquet_filepath) - return _build_column_statistics( - dataframe_or_view, - raw_summary_statistics, - ) - - -SUMMARIZE_STATEMENT_TEMPLATE = "SUMMARIZE (SELECT * FROM read_parquet(?))" - - -def _fetch_raw_summary_statistics( - parquet_filepath: Path, -) -> dict[str, dict[str, str | int | float]]: - with ( - duckdb.connect() as connection, # pyright: ignore [reportUnknownMemberType] - ): - logger.debug( - "Calculating summary statistics for Parquet '%s'", - parquet_filepath, - ) - return cast( - "dict[str, dict[str, str | int | float]]", - cast( - "object", - connection.execute( # pyright: ignore [reportUnknownMemberType] - SUMMARIZE_STATEMENT_TEMPLATE, - [str(parquet_filepath)], - ) - .df() - .set_index("column_name") - .to_dict(orient="index"), - ), - ) - - -def _build_column_statistics( - dataframe_or_view: DataFrame | View, - raw_summary_statistics: Mapping[str, Mapping[str, str | int | float]], -) -> dict[str, ColumnStatistics]: - return { - name: _get_column_statistics_type( - dataframe_or_view, - name, - assume_is_str(raw_summary_statistics[name]["column_type"]), - ).model_validate( - raw_summary_statistics[name], - ) - for name in dataframe_or_view.columns - } - - -def _get_column_statistics_type( - dataframe_or_view: DataFrame | View, - name: str, - column_type: str, -) -> type[ColumnStatistics]: - role = serialize_role(dataframe_or_view.roles.column(name)) - column_statistics_type = ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING.get(( - role, - column_type, - )) - if column_statistics_type is None: - raise UnsupportedColumnStatisticsError( - assume_is_str(dataframe_or_view.name), - name, - role, - column_type, - ) - return column_statistics_type diff --git a/src/getml_io/serialize/parquet.py b/src/getml_io/serialize/parquet.py new file mode 100644 index 0000000..b819bf7 --- /dev/null +++ b/src/getml_io/serialize/parquet.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import logging +from collections.abc import Callable, Mapping, Sequence +from logging import Logger +from pathlib import Path +from typing import cast + +import duckdb +import pyarrow as pa +import pyarrow.parquet as pq +from getml.data.roles.types import Role as GetMLRole + +from getml_io.getml.roles import Role +from getml_io.metadata.dataframe_information import ( + ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING, + ColumnInformation, + ColumnStatistics, + ColumnType, + DataFrameInformation, +) +from getml_io.serialize.exception import ( + DataFrameParquetStorageError, + UnsupportedColumnStatisticsError, +) +from getml_io.serialize.roles import serialize_role +from getml_io.utils.convert import assume_is_str +from getml_io.utils.exception import StorageDirectoryCreationError + +logger: Logger = logging.getLogger(__name__) + + +def save_table_as_parquet(path: Path, table: pa.Table) -> None: # pyright: ignore [reportUnknownParameterType, reportUnknownMemberType] + """Save a PyArrow Table as a Parquet file.""" + pq.write_table(table, path) # pyright: ignore [reportUnknownMemberType, reportUnknownArgumentType] + + +def serialize_dataframe( # noqa: PLR0913 + target_storage_directory: Path, + save_parquet: Callable[[Path], None], + dataframe_name: str, + get_getml_role_by_column: Callable[[str], GetMLRole], + column_names: Sequence[str], + *, + filename_prefix: str | None = None, +) -> DataFrameInformation: + """Serialize a dataframe into the target storage directory. + + Args: + target_storage_directory: The directory where the serialized dataframe + will be saved. + save_parquet: A callable that saves the dataframe as a Parquet file + to a given path. + dataframe_name: The name of the dataframe. + get_getml_role_by_column: A callable that returns the getML role + for a given column name. + column_names: The names of the columns in the dataframe. + filename_prefix: An optional prefix for the filename. + If provided and different from the DataFrame name, it will be used + as a prefix for the filename, followed by the DataFrame name. + Else, only the DataFrame name will be used as the filename. + + Returns: + DataFrameInformation: The serialized DataFrame information. + + Raises: + StorageDirectoryCreationError: If the target storage directory + cannot be created. + DataFrameParquetStorageError: If storing the DataFrame as a Parquet file fails. + + """ + try: + target_storage_directory.mkdir(parents=True, exist_ok=True) + except Exception as exception: + raise StorageDirectoryCreationError(target_storage_directory) from exception + + filename = ( + f"{filename_prefix}.{dataframe_name}" + if filename_prefix and filename_prefix != dataframe_name + else dataframe_name + ) + parquet_filepath = target_storage_directory / f"{filename}.parquet" + try: + save_parquet(parquet_filepath) + except Exception as exception: + raise DataFrameParquetStorageError( + dataframe_name, + parquet_filepath, + ) from exception + + column_information_by_name = _build_column_information_by_name( + parquet_filepath, + dataframe_name, + get_getml_role_by_column, + column_names, + ) + + return DataFrameInformation( + name=dataframe_name, + path=parquet_filepath, + columns=column_information_by_name, + ) + + +def _build_column_information_by_name( + parquet_filepath: Path, + dataframe_name: str, + get_getml_role_by_column: Callable[[str], GetMLRole], + column_names: Sequence[str], +) -> dict[str, ColumnInformation]: + summary_statistics = _calculate_summary_statistics( + parquet_filepath, + dataframe_name, + get_getml_role_by_column, + column_names, + ) + return { + column_name: ColumnInformation( + name=column_name, + role=Role(get_getml_role_by_column(column_name)), + statistics=summary_statistics[column_name], + ) + for column_name in column_names + } + + +def _calculate_summary_statistics( + parquet_filepath: Path, + dataframe_name: str, + get_getml_role_by_column: Callable[[str], GetMLRole], + column_names: Sequence[str], +) -> dict[str, ColumnStatistics]: + raw_summary_statistics = _fetch_raw_summary_statistics(parquet_filepath) + return _build_column_statistics_by_name( + dataframe_name, + get_getml_role_by_column, + column_names, + raw_summary_statistics, + ) + + +SUMMARIZE_STATEMENT_TEMPLATE = "SUMMARIZE (SELECT * FROM read_parquet(?))" + + +def _fetch_raw_summary_statistics( + parquet_filepath: Path, +) -> dict[str, dict[str, str | int | float | None]]: + with ( + duckdb.connect() as connection, # pyright: ignore [reportUnknownMemberType] + ): + logger.debug( + "Calculating summary statistics for Parquet '%s'", + parquet_filepath, + ) + return cast( + "dict[str, dict[str, str | int | float | None]]", + cast( + "object", + connection.execute( # pyright: ignore [reportUnknownMemberType] + SUMMARIZE_STATEMENT_TEMPLATE, + [str(parquet_filepath)], + ) + .df() + .set_index("column_name") + .to_dict(orient="index"), + ), + ) + + +def _build_column_statistics_by_name( + dataframe_name: str, + get_getml_role_by_column: Callable[[str], GetMLRole], + column_names: Sequence[str], + raw_summary_statistics: Mapping[str, Mapping[str, str | int | float | None]], +) -> dict[str, ColumnStatistics]: + return { + column_name: _get_column_statistics_type( + dataframe_name, + column_name, + get_getml_role_by_column(column_name), + assume_is_str(raw_summary_statistics[column_name]["column_type"]), + ).model_validate( + raw_summary_statistics[column_name], + ) + for column_name in column_names + } + + +def _get_column_statistics_type( + dataframe_name: str, + column_name: str, + column_role: GetMLRole, + column_type: str, +) -> type[ColumnStatistics]: + role = serialize_role(column_role) + column_statistics_type = ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING.get( + ( + role, + ColumnType(column_type), + ), + ) + if column_statistics_type is None: + raise UnsupportedColumnStatisticsError( + dataframe_name, + column_name, + role, + column_type, + ) + return column_statistics_type diff --git a/src/getml_io/serialize/pipeline.py b/src/getml_io/serialize/pipeline.py index 87c4d1f..294b308 100644 --- a/src/getml_io/serialize/pipeline.py +++ b/src/getml_io/serialize/pipeline.py @@ -1,7 +1,9 @@ import dataclasses +import functools from pathlib import Path from typing import cast +import getml.data.roles as getml_roles import numpy as np import pyarrow as pa from getml import feature_learning as getml_feature_learner @@ -57,6 +59,10 @@ from getml_io.serialize.dataframe_information import derive_instances_with_relative_path from getml_io.serialize.dataframe_or_view import serialize_dataframe_or_view from getml_io.serialize.exception import WrongPipelineScoreTypeError +from getml_io.serialize.parquet import ( + save_table_as_parquet, # pyright: ignore [reportUnknownVariableType] + serialize_dataframe, +) from getml_io.serialize.pipeline_information import serialize_pipeline_information from getml_io.serialize.placeholder import serialize_placeholder from getml_io.utils.convert import ( @@ -166,18 +172,21 @@ def serialize_predictions( "NDArray[np.float64]", pipeline.predict(container[subset_name]), # pyright: ignore [reportUnknownMemberType] ) + column_names = pipeline.targets prediction_table = pa.Table.from_arrays( # pyright: ignore [reportUnknownMemberType, reportUnknownVariableType] prediction.T, - names=list(map(str, range(prediction.shape[1]))), - ) - prediction_dataframe = DataFrame.from_arrow( # pyright: ignore [reportUnknownMemberType] - prediction_table, # pyright: ignore [reportUnknownArgumentType] - f"prediction.{subset_name}", + names=column_names, ) - prediction_results[subset_name] = serialize_dataframe_or_view( - prediction_dataframe, - predict_storage_directory, + prediction_results[subset_name] = serialize_dataframe( + target_storage_directory=predict_storage_directory, + save_parquet=functools.partial( + save_table_as_parquet, + table=prediction_table, + ), + dataframe_name=f"prediction.{subset_name}", + get_getml_role_by_column=(lambda _column_name: getml_roles.target), + column_names=column_names, ) return prediction_results diff --git a/src/getml_io/utils/convert.py b/src/getml_io/utils/convert.py index ade46b4..b1bbdb5 100644 --- a/src/getml_io/utils/convert.py +++ b/src/getml_io/utils/convert.py @@ -12,7 +12,7 @@ def assume_is_str( - value: FloatColumn | StringColumn | str | Subset | float, + value: FloatColumn | StringColumn | str | Subset | float | None, ) -> str: """Assume `value` conforms to the return type for static analysis. diff --git a/tests/integration/assertions.py b/tests/integration/assertions.py index 3488862..b295d95 100644 --- a/tests/integration/assertions.py +++ b/tests/integration/assertions.py @@ -5,7 +5,7 @@ from getml_io.getml.scores import Scores from getml_io.metadata.container_information import ContainerInformation from getml_io.metadata.dataframe_information import ( - ColumnProfile, + ColumnInformation, ColumnStatistics, DataFrameInformation, ) @@ -104,28 +104,28 @@ def assert_dataframe_information( assert dataframe_information.path == expected_dataframe_information.path assert ( - dataframe_information.column_profile.keys() - == expected_dataframe_information.column_profile.keys() + dataframe_information.columns.keys() + == expected_dataframe_information.columns.keys() ) for ( column_name, - column_profile, - ) in expected_dataframe_information.column_profile.items(): - assert_column_profile( - dataframe_information.column_profile[column_name], - column_profile, + column_information_by_name, + ) in expected_dataframe_information.columns.items(): + assert_column_information( + dataframe_information.columns[column_name], + column_information_by_name, ) -def assert_column_profile( - column_profile: ColumnProfile, - expected_column_profile: ColumnProfile, +def assert_column_information( + column_information: ColumnInformation, + expected_column_information: ColumnInformation, ) -> None: - assert column_profile.name == expected_column_profile.name - assert column_profile.role == expected_column_profile.role + assert column_information.name == expected_column_information.name + assert column_information.role == expected_column_information.role assert_column_statistics( - column_profile.statistics, - expected_column_profile.statistics, + column_information.statistics, + expected_column_information.statistics, ) diff --git a/tests/integration/data/cora/cora.py b/tests/integration/data/cora/cora.py index f0ab517..f569467 100644 --- a/tests/integration/data/cora/cora.py +++ b/tests/integration/data/cora/cora.py @@ -21,7 +21,7 @@ ) -class CoraProject(GetMLProject): +class CoraProject(GetMLProject, frozen=True): pass diff --git a/tests/integration/data/datasets.py b/tests/integration/data/datasets.py index 63d43ee..3d40c35 100644 --- a/tests/integration/data/datasets.py +++ b/tests/integration/data/datasets.py @@ -3,9 +3,8 @@ from collections.abc import Mapping, Sequence from enum import Enum from pathlib import Path -from typing import ClassVar -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel DATA_PATH: Path = Path(__file__).parent @@ -17,9 +16,7 @@ class DataSetName(str, Enum): NUMERICAL = "numerical" -class DataSet(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict(frozen=True) - +class DataSet(BaseModel, frozen=True): name: DataSetName population: Sequence[Path] peripheral: Sequence[Path] diff --git a/tests/integration/data/getmlproject.py b/tests/integration/data/getmlproject.py index 5a7519c..39c74da 100644 --- a/tests/integration/data/getmlproject.py +++ b/tests/integration/data/getmlproject.py @@ -6,13 +6,13 @@ from contextlib import contextmanager from itertools import chain from pathlib import Path -from typing import ClassVar, TypeVar +from typing import TypeVar import getml from filelock import FileLock from getml.data import Container, DataFrame, View from getml.pipeline import Pipeline -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel from tests.integration.data.datasets import DATASETS, DataSetName @@ -109,12 +109,7 @@ def _save_project_bundle(self) -> None: getml.project.save(filename=self._path) # pyright: ignore [reportUnknownMemberType] -class GetMLProject(BaseModel): - model_config: ClassVar[ConfigDict] = ConfigDict( - arbitrary_types_allowed=True, - frozen=True, - ) - +class GetMLProject(BaseModel, frozen=True, arbitrary_types_allowed=True): name: str pipeline: Pipeline container: Container diff --git a/tests/integration/data/loans/expected.container.json b/tests/integration/data/loans/expected.container.json index 476d87c..aa57349 100644 --- a/tests/integration/data/loans/expected.container.json +++ b/tests/integration/data/loans/expected.container.json @@ -5,7 +5,7 @@ "meta": { "name": "meta", "path": "container/peripheral/meta.parquet", - "column_profile": { + "columns": { "account_id": { "name": "account_id", "role": "join_key", @@ -421,7 +421,7 @@ "order": { "name": "order", "path": "container/peripheral/order.parquet", - "column_profile": { + "columns": { "account_id": { "name": "account_id", "role": "join_key", @@ -520,7 +520,7 @@ "trans": { "name": "trans", "path": "container/peripheral/trans.parquet", - "column_profile": { + "columns": { "date": { "name": "date", "role": "time_stamp", @@ -683,7 +683,7 @@ "train": { "name": "train", "path": "container/subsets/train.parquet", - "column_profile": { + "columns": { "date_loan": { "name": "date_loan", "role": "time_stamp", @@ -867,7 +867,7 @@ "test": { "name": "test", "path": "container/subsets/test.parquet", - "column_profile": { + "columns": { "date_loan": { "name": "date_loan", "role": "time_stamp", diff --git a/tests/integration/data/loans/expected.pipeline.json b/tests/integration/data/loans/expected.pipeline.json index d9b10f8..2b61ba8 100644 --- a/tests/integration/data/loans/expected.pipeline.json +++ b/tests/integration/data/loans/expected.pipeline.json @@ -1,26 +1,26 @@ { - "id": "wzvjNK", + "id": "MoMdqE", "predictions": { "train": { "name": "prediction.train", "path": "pipeline/predictions/prediction.train.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "default": { + "name": "default", + "role": "target", "statistics": { "count": 459, - "approx_unique": 452, - "avg": 0.11984065395154968, - "min": 0.0013052262365818024, - "max": 0.9940769672393799, - "q25": 0.007235923552394119, - "q50": 0.017314163701874868, - "q75": 0.06367057869728242, - "std": 0.25014608992693693, + "approx_unique": 400, "null_percentage": 0.0, + "avg": 0.1199548491776448, + "min": 0.0014046502765268087, + "max": 0.9896621704101562, + "q25": 0.005924892621502901, + "q50": 0.01712311291116841, + "q75": 0.06485380087461735, + "std": 0.251528629165957, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -28,23 +28,23 @@ "test": { "name": "prediction.test", "path": "pipeline/predictions/prediction.test.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "default": { + "name": "default", + "role": "target", "statistics": { "count": 223, - "approx_unique": 229, - "avg": 0.10654706618594081, - "min": 0.0016005451325327158, - "max": 0.9642216563224792, - "q25": 0.008410481399753027, - "q50": 0.016846238325039547, - "q75": 0.05923604799641504, - "std": 0.2183209639358569, + "approx_unique": 207, "null_percentage": 0.0, + "avg": 0.10620744447035907, + "min": 0.001119182095862925, + "max": 0.9610710740089417, + "q25": 0.0056097044402526495, + "q50": 0.02214129890004794, + "q75": 0.06275331673936711, + "std": 0.22245768912509767, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -54,20 +54,20 @@ "train": { "name": "features.train", "path": "pipeline/feature_sets/features.train.parquet", - "column_profile": { + "columns": { "date_loan": { "name": "date_loan", "role": "time_stamp", "statistics": { "count": 459, "approx_unique": 441, + "null_percentage": 0.0, "avg": "1996-09-03T01:46:40", "min": "1993-07-05T00:00:00", "max": "1998-12-08T00:00:00", "q25": "1995-05-25T08:40:00", "q50": "1997-01-05T09:18:22.040816", "q75": "1997-12-03T08:40:00", - "null_percentage": 0.0, "column_type": "TIMESTAMP_NS", "type": "time_stamp" } @@ -78,9 +78,9 @@ "statistics": { "count": 459, "approx_unique": 526, + "null_percentage": 0.0, "min": "10001", "max": "9928", - "null_percentage": 0.0, "column_type": "VARCHAR", "type": "join_key" } @@ -91,6 +91,7 @@ "statistics": { "count": 459, "approx_unique": 2, + "null_percentage": 0.0, "avg": 0.11982570806100218, "min": 0.0, "max": 1.0, @@ -98,7 +99,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.3251119594076471, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -108,15 +108,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 40, - "avg": 769.5424836601308, + "approx_unique": 10, + "null_percentage": 0.0, + "avg": 0.11982570806100218, "min": 0.0, - "max": 19621.0, + "max": 11.0, "q25": 0.0, "q50": 0.0, - "q75": 202.7777777777778, - "std": 2515.4971605816318, - "null_percentage": 0.0, + "q75": 0.0, + "std": 0.8846206426182848, "column_type": "DOUBLE", "type": "numerical" } @@ -126,15 +126,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 17, - "avg": 882447.0588235294, + "approx_unique": 19, + "null_percentage": 0.0, + "avg": 390821.89090690034, "min": 0.0, - "max": 68601600.0, + "max": 34905600.0, "q25": 0.0, "q50": 0.0, "q75": 0.0, - "std": 6212366.872861004, - "null_percentage": 0.0, + "std": 2728528.065250332, "column_type": "DOUBLE", "type": "numerical" } @@ -144,15 +144,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 46, - "avg": 15.858387799564266, - "min": 0.0, - "max": 100.0, - "q25": 0.0, - "q50": 0.0, - "q75": 0.0, - "std": 30.971326900401827, + "approx_unique": 122, "null_percentage": 0.0, + "avg": 68.10718954248371, + "min": 0.0, + "max": 200.0, + "q25": 46.709027777777784, + "q50": 61.10612244897959, + "q75": 100.0, + "std": 45.586815929888814, "column_type": "DOUBLE", "type": "numerical" } @@ -163,6 +163,7 @@ "statistics": { "count": 459, "approx_unique": 20, + "null_percentage": 0.0, "avg": 1.2352941176470589, "min": 0.0, "max": 24.0, @@ -170,7 +171,6 @@ "q50": 0.0, "q75": 1.0, "std": 2.817052035928917, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -181,6 +181,7 @@ "statistics": { "count": 459, "approx_unique": 34, + "null_percentage": 0.0, "avg": 7.1725490196078425, "min": 0.0, "max": 100.0, @@ -188,7 +189,6 @@ "q50": 0.0, "q75": 0.0, "std": 22.558438138781643, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -198,15 +198,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 53, - "avg": 111.88671023965142, - "min": 0.0, - "max": 167.0, - "q25": 101.6388888888889, - "q50": 113.53061224489795, - "q75": 131.20138888888889, - "std": 39.75240260604348, + "approx_unique": 101, "null_percentage": 0.0, + "avg": 130.9847494553377, + "min": 0.0, + "max": 334.0, + "q25": 103.53472222222223, + "q50": 119.48979591836734, + "q75": 164.2847222222222, + "std": 69.63555931149587, "column_type": "DOUBLE", "type": "numerical" } @@ -217,14 +217,14 @@ "statistics": { "count": 459, "approx_unique": 75, - "avg": 9055.906318082789, + "null_percentage": 0.0, + "avg": 9017.152505446624, "min": 0.0, "max": 12541.0, - "q25": 8516.840277777777, - "q50": 8967.08163265306, + "q25": 8514.173611111111, + "q50": 8966.469387755102, "q75": 9895.083333333334, - "std": 2308.773928846936, - "null_percentage": 0.0, + "std": 2384.4718489689444, "column_type": "DOUBLE", "type": "numerical" } @@ -234,15 +234,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 20, - "avg": 2.7734204793028323, + "approx_unique": 69, + "null_percentage": 0.0, + "avg": 26.28322440087146, "min": 0.0, - "max": 95.0, + "max": 196.0, "q25": 0.0, - "q50": 0.0, - "q75": 0.0, - "std": 13.070720949321736, - "null_percentage": 0.0, + "q50": 5.061224489795919, + "q75": 49.201388888888886, + "std": 33.55674269015918, "column_type": "DOUBLE", "type": "numerical" } @@ -252,15 +252,15 @@ "role": "numerical", "statistics": { "count": 459, - "approx_unique": 7, - "avg": 0.2324400871459697, + "approx_unique": 5, + "null_percentage": 0.0, + "avg": 0.12196078431372549, "min": 0.0, "max": 7.01, "q25": 0.0, "q50": 0.0, "q75": 0.0, - "std": 0.898713135182863, - "null_percentage": 0.0, + "std": 0.833398675407252, "column_type": "DOUBLE", "type": "numerical" } @@ -271,6 +271,7 @@ "statistics": { "count": 459, "approx_unique": 23, + "null_percentage": 0.0, "avg": 0.1993464052287582, "min": 0.0, "max": 6.6, @@ -278,7 +279,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.7869983380593814, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -289,6 +289,7 @@ "statistics": { "count": 459, "approx_unique": 5, + "null_percentage": 0.0, "avg": 37.12418300653595, "min": 12.0, "max": 60.0, @@ -296,7 +297,6 @@ "q50": 36.0, "q75": 48.0, "std": 16.81208246952378, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -307,6 +307,7 @@ "statistics": { "count": 459, "approx_unique": 585, + "null_percentage": 0.0, "avg": 4185.23311546841, "min": 312.0, "max": 9910.0, @@ -314,7 +315,6 @@ "q50": 3976.8571428571427, "q75": 5883.611111111112, "std": 2268.721641494734, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -325,6 +325,7 @@ "statistics": { "count": 459, "approx_unique": 535, + "null_percentage": 0.0, "avg": 154961.38562091504, "min": 4980.0, "max": 590820.0, @@ -332,7 +333,6 @@ "q50": 121004.81632653062, "q75": 216506.33333333334, "std": 116425.1930214991, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -342,20 +342,20 @@ "test": { "name": "features.test", "path": "pipeline/feature_sets/features.test.parquet", - "column_profile": { + "columns": { "date_loan": { "name": "date_loan", "role": "time_stamp", "statistics": { "count": 223, "approx_unique": 182, + "null_percentage": 0.0, "avg": "1996-11-22T01:49:46.547085", "min": "1993-11-10T00:00:00", "max": "1998-12-06T00:00:00", "q25": "1995-11-23T07:20:00", "q50": "1997-04-28T16:00:00", "q75": "1998-01-15T20:00:00", - "null_percentage": 0.0, "column_type": "TIMESTAMP_NS", "type": "time_stamp" } @@ -366,9 +366,9 @@ "statistics": { "count": 223, "approx_unique": 203, + "null_percentage": 0.0, "min": "10005", "max": "993", - "null_percentage": 0.0, "column_type": "VARCHAR", "type": "join_key" } @@ -379,6 +379,7 @@ "statistics": { "count": 223, "approx_unique": 2, + "null_percentage": 0.0, "avg": 0.09417040358744394, "min": 0.0, "max": 1.0, @@ -386,7 +387,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.29272270742877293, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -396,15 +396,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 30, - "avg": 842.9372197309417, + "approx_unique": 9, + "null_percentage": 0.0, + "avg": 0.242152466367713, "min": 0.0, - "max": 18200.0, + "max": 19.0, "q25": 0.0, "q50": 0.0, - "q75": 144.44444444444446, - "std": 2885.3221572288658, - "null_percentage": 0.0, + "q75": 0.0, + "std": 1.7175875691592488, "column_type": "DOUBLE", "type": "numerical" } @@ -414,15 +414,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 11, - "avg": 1550550.67264574, + "approx_unique": 12, + "null_percentage": 0.0, + "avg": 617663.1390134529, "min": 0.0, - "max": 128390400.0, + "max": 31536000.0, "q25": 0.0, "q50": 0.0, "q75": 0.0, - "std": 10521062.635618178, - "null_percentage": 0.0, + "std": 3513401.154308259, "column_type": "DOUBLE", "type": "numerical" } @@ -432,15 +432,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 27, - "avg": 14.7, - "min": 0.0, - "max": 100.0, - "q25": 0.0, - "q50": 0.0, - "q75": 0.0, - "std": 29.76500454385141, + "approx_unique": 96, "null_percentage": 0.0, + "avg": 71.24215246636768, + "min": 0.0, + "max": 200.0, + "q25": 51.166666666666664, + "q50": 66.43333333333334, + "q75": 100.0, + "std": 44.45756878612511, "column_type": "DOUBLE", "type": "numerical" } @@ -451,6 +451,7 @@ "statistics": { "count": 223, "approx_unique": 13, + "null_percentage": 0.0, "avg": 1.201793721973094, "min": 0.0, "max": 14.0, @@ -458,7 +459,6 @@ "q50": 0.0, "q75": 1.0, "std": 2.3209962936410613, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -469,6 +469,7 @@ "statistics": { "count": 223, "approx_unique": 13, + "null_percentage": 0.0, "avg": 3.6977578475336323, "min": 0.0, "max": 100.0, @@ -476,7 +477,6 @@ "q50": 0.0, "q75": 0.0, "std": 15.026950095889092, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -486,15 +486,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 49, - "avg": 113.81165919282512, - "min": 0.0, - "max": 167.0, - "q25": 100.0, - "q50": 111.0, - "q75": 127.16666666666667, - "std": 31.72446761464842, + "approx_unique": 80, "null_percentage": 0.0, + "avg": 137.85201793721973, + "min": 0.0, + "max": 334.0, + "q25": 104.44444444444444, + "q50": 118.66666666666667, + "q75": 167.0, + "std": 62.84121691379701, "column_type": "DOUBLE", "type": "numerical" } @@ -505,6 +505,7 @@ "statistics": { "count": 223, "approx_unique": 63, + "null_percentage": 0.0, "avg": 9169.70403587444, "min": 0.0, "max": 12541.0, @@ -512,7 +513,6 @@ "q50": 8905.666666666666, "q75": 9925.222222222223, "std": 1980.4491806294118, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -522,15 +522,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 9, - "avg": 2.9596412556053813, + "approx_unique": 51, + "null_percentage": 0.0, + "avg": 22.816143497757846, "min": 0.0, - "max": 75.0, + "max": 142.0, "q25": 0.0, "q50": 0.0, - "q75": 0.0, - "std": 12.981732968356484, - "null_percentage": 0.0, + "q75": 41.583333333333336, + "std": 30.159595150714328, "column_type": "DOUBLE", "type": "numerical" } @@ -540,15 +540,15 @@ "role": "numerical", "statistics": { "count": 223, - "approx_unique": 6, - "avg": 0.2531390134529148, + "approx_unique": 4, + "null_percentage": 0.0, + "avg": 0.17748878923766814, "min": 0.0, "max": 7.01, "q25": 0.0, "q50": 0.0, "q75": 0.0, - "std": 1.0314587821754926, - "null_percentage": 0.0, + "std": 1.0049490008170177, "column_type": "DOUBLE", "type": "numerical" } @@ -559,6 +559,7 @@ "statistics": { "count": 223, "approx_unique": 17, + "null_percentage": 0.0, "avg": 0.20986547085201795, "min": 0.0, "max": 6.6, @@ -566,7 +567,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.8666449130086721, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -577,6 +577,7 @@ "statistics": { "count": 223, "approx_unique": 5, + "null_percentage": 0.0, "avg": 35.19282511210762, "min": 12.0, "max": 60.0, @@ -584,7 +585,6 @@ "q50": 36.0, "q75": 48.0, "std": 17.571310118488245, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -595,6 +595,7 @@ "statistics": { "count": 223, "approx_unique": 214, + "null_percentage": 0.0, "avg": 4201.8430493273545, "min": 304.0, "max": 9721.0, @@ -602,7 +603,6 @@ "q50": 3888.6666666666665, "q75": 5700.5, "std": 2107.7211804113517, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -613,6 +613,7 @@ "statistics": { "count": 223, "approx_unique": 223, + "null_percentage": 0.0, "avg": 144100.73542600896, "min": 10944.0, "max": 566640.0, @@ -620,7 +621,6 @@ "q50": 108992.0, "q75": 198987.0, "std": 106696.63113384871, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -631,11 +631,11 @@ "feature_learners": [ { "aggregation": [ - "COUNT", "AVG", + "COUNT", "SUM", - "MAX", - "MIN" + "MIN", + "MAX" ], "allow_sets": true, "delta_t": 0.0, @@ -649,19 +649,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "MEDIAN", + "COUNT DISTINCT", "STDDEV", - "COUNT", "AVG", - "TREND", - "COUNT DISTINCT", - "SUM", "FIRST", - "MODE", - "LAST", + "MEDIAN", + "COUNT", "COUNT MINUS COUNT DISTINCT", - "MAX", - "MIN" + "LAST", + "MIN", + "MODE", + "TREND", + "SUM", + "MAX" ], "delta_t": 0.0, "loss_function": "CrossEntropyLoss", @@ -962,87 +962,87 @@ "name": "feature_1_1", "index": 0, "target": "default", - "importance": 0.1135676632264945, - "correlation": 0.490601623544188, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_1\";\n\nCREATE TABLE \"FEATURE_1_1\" AS\nSELECT MAX( t2.\"amount\" ) AS \"feature_1_1\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"balance\" <= 3480.000000 ) AND ( t1.\"date_loan\" - t2.\"date\" <= 19958400.000000 ) AND ( t2.\"amount\" <= 20299.000000 ) AND ( t2.\"k_symbol\" NOT IN ( 'POJISTNE', 'SLUZBY', 'UROK', 'SANKC. UROK' ) OR t2.\"k_symbol\" IS NULL ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.0, + "correlation": 0.33072895803942076, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_1\";\n\nCREATE TABLE \"FEATURE_1_1\" AS\nSELECT MIN( t2.\"amount\" ) AS \"feature_1_1\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"k_symbol\" IN ( 'SANKC. UROK' ) ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_2": { "name": "feature_1_2", "index": 1, "target": "default", - "importance": 0.09923157995248899, - "correlation": 0.44092213849038303, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_2\";\n\nCREATE TABLE \"FEATURE_1_2\" AS\nSELECT SUM( t1.\"date_loan\" - t2.\"date\" ) AS \"feature_1_2\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"k_symbol\" NOT IN ( 'SANKC. UROK' ) OR t2.\"k_symbol\" IS NULL ) AND ( t2.\"balance\" > 186780.000000 ) )\nOR ( ( t2.\"k_symbol\" IN ( 'SANKC. UROK' ) ) AND ( t1.\"frequency\" NOT IN ( 'POPLATEK MESICNE' ) OR t1.\"frequency\" IS NULL ) AND ( t2.\"amount\" > 4.000000 ) )\nOR ( ( t2.\"k_symbol\" IN ( 'SANKC. UROK' ) ) AND ( t1.\"frequency\" IN ( 'POPLATEK MESICNE' ) ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.20518495220716465, + "correlation": 0.5464694980730578, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_2\";\n\nCREATE TABLE \"FEATURE_1_2\" AS\nSELECT AVG( t1.\"date_loan\" - t2.\"date\" ) AS \"feature_1_2\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"balance\" <= -26.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_3": { "name": "feature_1_3", "index": 2, "target": "default", - "importance": 0.03450241472632771, - "correlation": -0.0021713705808225877, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_3\";\n\nCREATE TABLE \"FEATURE_1_3\" AS\nSELECT AVG( t2.\"a10\" ) AS \"feature_1_3\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"amount\" > 234442.000000 ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"frequency\" IN ( 'POPLATEK MESICNE' ) ) AND ( t2.\"a9\" <= 9.000000 ) )\nOR ( ( t1.\"amount\" <= 234442.000000 OR t1.\"amount\" IS NULL ) AND ( t1.\"payments\" > 6912.000000 ) AND ( t2.\"a3\" NOT IN ( 'east Bohemia', 'south Moravia', 'north Bohemia', 'north Moravia' ) OR t2.\"a3\" IS NULL ) )\nOR ( ( t1.\"amount\" <= 234442.000000 OR t1.\"amount\" IS NULL ) AND ( t1.\"payments\" <= 6912.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"a12\" > 4.637500 ) AND ( t1.\"amount\" > 83764.000000 ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.07021464698347542, + "correlation": -0.16973924231346754, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_3\";\n\nCREATE TABLE \"FEATURE_1_3\" AS\nSELECT SUM( t2.\"a10\" ) AS \"feature_1_3\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"amount\" > 266840.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t2.\"type_disp\" NOT IN ( 'OWNER' ) OR t2.\"type_disp\" IS NULL ) )\nOR ( ( t1.\"amount\" > 266840.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t2.\"type_disp\" IN ( 'OWNER' ) ) AND ( t1.\"frequency\" NOT IN ( 'POPLATEK MESICNE' ) OR t1.\"frequency\" IS NULL ) )\nOR ( ( t1.\"amount\" > 266840.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"type_card\" IN ( 'gold', 'classic', 'junior' ) ) )\nOR ( ( t1.\"amount\" <= 266840.000000 ) AND ( t2.\"a14\" > 101.000000 ) )\nOR ( ( t1.\"amount\" <= 266840.000000 ) AND ( t2.\"a14\" <= 101.000000 OR t2.\"a14\" IS NULL ) AND ( t1.\"duration\" <= 28.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_4": { "name": "feature_1_4", "index": 3, "target": "default", - "importance": 0.25317056825181045, + "importance": 0.2207012356972649, "correlation": 0.6216480344796319, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_4\";\n\nCREATE TABLE \"FEATURE_1_4\" AS\nSELECT COUNT( * ) AS \"feature_1_4\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"balance\" > 13055.000000 OR t2.\"balance\" IS NULL ) AND ( t2.\"balance\" > 133677.000000 ) AND ( t1.\"frequency\" NOT IN ( 'POPLATEK MESICNE', 'POPLATEK PO OBRATU' ) OR t1.\"frequency\" IS NULL ) )\nOR ( ( t2.\"balance\" <= 13055.000000 ) AND ( t2.\"operation\" IN ( 'VYBER', 'VKLAD' ) ) AND ( t1.\"date_loan\" - t2.\"date\" <= 18423138.461538 ) AND ( t2.\"balance\" > -4311.000000 ) )\n)\nGROUP BY t1.rowid;" + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_4\";\n\nCREATE TABLE \"FEATURE_1_4\" AS\nSELECT COUNT( * ) AS \"feature_1_4\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"TRANS__STAGING_TABLE_4\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE ( t2.\"date\" <= t1.\"date_loan\"\n) AND (\n ( ( t2.\"balance\" > 13055.000000 OR t2.\"balance\" IS NULL ) AND ( t2.\"balance\" > 133677.000000 ) AND ( t1.\"frequency\" IN ( 'POPLATEK TYDNE' ) ) )\nOR ( ( t2.\"balance\" <= 13055.000000 ) AND ( t2.\"operation\" IN ( 'VYBER', 'VKLAD' ) ) AND ( t1.\"date_loan\" - t2.\"date\" <= 18423138.461538 ) AND ( t2.\"balance\" > -4311.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_5": { "name": "feature_1_5", "index": 4, "target": "default", - "importance": 0.022781446408607083, + "importance": 0.033639677000706886, "correlation": 0.23609098488653527, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_5\";\n\nCREATE TABLE \"FEATURE_1_5\" AS\nSELECT MAX( t2.\"a10\" ) AS \"feature_1_5\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 6880.000000 ) AND ( t2.\"type_card\" NOT IN ( 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"payments\" > 8407.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"a3\" IN ( 'south Bohemia', 'north Moravia' ) ) )\nOR ( ( t1.\"payments\" > 6880.000000 ) AND ( t2.\"type_card\" NOT IN ( 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"payments\" <= 8407.000000 ) AND ( t2.\"a8\" <= 2.000000 ) )\n)\nGROUP BY t1.rowid;" + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_5\";\n\nCREATE TABLE \"FEATURE_1_5\" AS\nSELECT MIN( t2.\"a10\" ) AS \"feature_1_5\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 6880.000000 ) AND ( t2.\"type_card\" NOT IN ( 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"payments\" > 8407.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"a3\" IN ( 'south Bohemia', 'north Moravia' ) ) )\nOR ( ( t1.\"payments\" > 6880.000000 ) AND ( t2.\"type_card\" NOT IN ( 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"payments\" <= 8407.000000 ) AND ( t2.\"a8\" <= 2.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_6": { "name": "feature_1_6", "index": 5, "target": "default", - "importance": 0.1746793231004474, - "correlation": -0.18288971144527097, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_6\";\n\nCREATE TABLE \"FEATURE_1_6\" AS\nSELECT AVG( t2.\"a14\" ) AS \"feature_1_6\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 6874.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t2.\"type_disp\" NOT IN ( 'OWNER' ) OR t2.\"type_disp\" IS NULL ) )\nOR ( ( t1.\"payments\" > 6874.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t2.\"type_disp\" IN ( 'OWNER' ) ) AND ( t1.\"duration\" <= 20.000000 ) )\nOR ( ( t1.\"payments\" > 6874.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"type_card\" IN ( 'gold', 'classic', 'junior' ) ) )\nOR ( ( t1.\"payments\" <= 6874.000000 ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.13030999198640988, + "correlation": -0.2404417686519712, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_6\";\n\nCREATE TABLE \"FEATURE_1_6\" AS\nSELECT SUM( t2.\"a14\" ) AS \"feature_1_6\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 6874.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic', 'junior' ) OR t2.\"type_card\" IS NULL ) AND ( t2.\"type_disp\" IN ( 'DISPONENT' ) ) )\nOR ( ( t1.\"payments\" > 6874.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"type_card\" IN ( 'gold', 'classic', 'junior' ) ) )\nOR ( ( t1.\"payments\" <= 6874.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_7": { "name": "feature_1_7", "index": 6, "target": "default", - "importance": 0.12502651911553847, + "importance": 0.12470750125347366, "correlation": -0.051941482060654416, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_7\";\n\nCREATE TABLE \"FEATURE_1_7\" AS\nSELECT MIN( t2.\"a11\" ) AS \"feature_1_7\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"amount\" > 172870.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"a3\" NOT IN ( 'east Bohemia', 'south Moravia', 'north Bohemia', 'west Bohemia' ) OR t2.\"a3\" IS NULL ) AND ( t2.\"type_card\" IN ( 'classic' ) ) )\nOR ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"amount\" > 172870.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"a3\" IN ( 'east Bohemia', 'south Moravia', 'north Bohemia', 'west Bohemia' ) ) AND ( t2.\"a12\" <= 3.850000 ) )\nOR ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"amount\" <= 172870.000000 ) AND ( t2.\"a3\" NOT IN ( 'west Bohemia' ) OR t2.\"a3\" IS NULL ) )\nOR ( ( t1.\"payments\" <= 7395.000000 ) AND ( t2.\"a13\" > 8.901667 OR t2.\"a13\" IS NULL ) AND ( t2.\"gender\" NOT IN ( 'M' ) OR t2.\"gender\" IS NULL ) )\nOR ( ( t1.\"payments\" <= 7395.000000 ) AND ( t2.\"a13\" <= 8.901667 ) )\n)\nGROUP BY t1.rowid;" + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_7\";\n\nCREATE TABLE \"FEATURE_1_7\" AS\nSELECT MAX( t2.\"a11\" ) AS \"feature_1_7\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"duration\" > 20.000000 OR t1.\"duration\" IS NULL ) AND ( t2.\"a3\" NOT IN ( 'east Bohemia', 'south Moravia', 'north Bohemia', 'west Bohemia' ) OR t2.\"a3\" IS NULL ) AND ( t2.\"type_card\" IN ( 'classic' ) ) )\nOR ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"duration\" > 20.000000 OR t1.\"duration\" IS NULL ) AND ( t2.\"a3\" IN ( 'east Bohemia', 'south Moravia', 'north Bohemia', 'west Bohemia' ) ) AND ( t2.\"a12\" <= 3.850000 ) )\nOR ( ( t1.\"payments\" > 7395.000000 OR t1.\"payments\" IS NULL ) AND ( t1.\"duration\" <= 20.000000 ) AND ( t2.\"a3\" IN ( 'south Bohemia', 'Prague', 'central Bohemia', 'south Moravia', 'north Moravia' ) ) )\nOR ( ( t1.\"payments\" <= 7395.000000 ) AND ( t2.\"a13\" > 8.901667 OR t2.\"a13\" IS NULL ) AND ( t2.\"gender\" IN ( 'F' ) ) )\nOR ( ( t1.\"payments\" <= 7395.000000 ) AND ( t2.\"a13\" <= 8.901667 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_8": { "name": "feature_1_8", "index": 7, "target": "default", - "importance": 0.009308557459210479, - "correlation": -0.03337139710997413, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_8\";\n\nCREATE TABLE \"FEATURE_1_8\" AS\nSELECT AVG( t2.\"a5\" ) AS \"feature_1_8\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 7042.000000 ) AND ( t2.\"a9\" > 4.000000 OR t2.\"a9\" IS NULL ) AND ( t2.\"a3\" NOT IN ( 'north Moravia', 'west Bohemia' ) OR t2.\"a3\" IS NULL ) AND ( t2.\"a11\" > 9765.000000 ) )\nOR ( ( t1.\"payments\" > 7042.000000 ) AND ( t2.\"a9\" > 4.000000 OR t2.\"a9\" IS NULL ) AND ( t2.\"a3\" IN ( 'north Moravia', 'west Bohemia' ) ) )\nOR ( ( t1.\"payments\" > 7042.000000 ) AND ( t2.\"a9\" <= 4.000000 ) )\nOR ( ( t1.\"payments\" <= 7042.000000 OR t1.\"payments\" IS NULL ) AND ( t2.\"a14\" <= 95.000000 ) AND ( t2.\"a6\" > 25.000000 ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.07269841175894148, + "correlation": -0.023541460834913774, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_8\";\n\nCREATE TABLE \"FEATURE_1_8\" AS\nSELECT SUM( t2.\"a5\" ) AS \"feature_1_8\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t2.\"gender\" NOT IN ( 'F' ) OR t2.\"gender\" IS NULL ) AND ( t2.\"a14\" > 103.000000 ) )\nOR ( ( t2.\"gender\" NOT IN ( 'F' ) OR t2.\"gender\" IS NULL ) AND ( t2.\"a14\" <= 103.000000 OR t2.\"a14\" IS NULL ) AND ( t2.\"a10\" > 56.916667 ) )\nOR ( ( t2.\"gender\" IN ( 'F' ) ) AND ( t2.\"type_card\" IN ( 'gold', 'classic' ) ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_9": { "name": "feature_1_9", "index": 8, "target": "default", - "importance": 0.007941830306946887, - "correlation": -0.0232127074123776, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_9\";\n\nCREATE TABLE \"FEATURE_1_9\" AS\nSELECT AVG( t2.\"a13\" ) AS \"feature_1_9\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t2.\"a11\" > 8329.000000 OR t2.\"a11\" IS NULL ) AND ( t2.\"a4\" > 367714.000000 ) AND ( t2.\"type_card\" NOT IN ( 'classic' ) OR t2.\"type_card\" IS NULL ) )\nOR ( ( t2.\"a11\" <= 8329.000000 ) AND ( t1.\"amount\" > 235098.000000 ) AND ( t2.\"a3\" NOT IN ( 'south Moravia' ) OR t2.\"a3\" IS NULL ) )\nOR ( ( t2.\"a11\" <= 8329.000000 ) AND ( t1.\"amount\" <= 235098.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"a8\" > 2.000000 ) AND ( t1.\"duration\" <= 30.000000 ) )\n)\nGROUP BY t1.rowid;" + "importance": 0.0, + "correlation": 0.000501261773749704, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_9\";\n\nCREATE TABLE \"FEATURE_1_9\" AS\nSELECT SUM( t2.\"a13\" ) AS \"feature_1_9\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t2.\"a11\" <= 8329.000000 ) AND ( t1.\"amount\" > 235098.000000 ) AND ( t2.\"a3\" NOT IN ( 'south Moravia' ) OR t2.\"a3\" IS NULL ) )\nOR ( ( t2.\"a11\" <= 8329.000000 ) AND ( t1.\"amount\" <= 235098.000000 OR t1.\"amount\" IS NULL ) AND ( t2.\"a7\" <= 3.000000 ) AND ( t1.\"duration\" <= 30.000000 ) )\n)\nGROUP BY t1.rowid;" }, "feature_1_10": { "name": "feature_1_10", "index": 9, "target": "default", - "importance": 0.0010942671416343636, + "importance": 0.006594382647831843, "correlation": 0.15435132513852295, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_10\";\n\nCREATE TABLE \"FEATURE_1_10\" AS\nSELECT MIN( t2.\"a12\" ) AS \"feature_1_10\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 7240.000000 ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"frequency\" IN ( 'POPLATEK MESICNE', 'POPLATEK TYDNE' ) ) AND ( t2.\"type_disp\" NOT IN ( 'DISPONENT' ) OR t2.\"type_disp\" IS NULL ) )\n)\nGROUP BY t1.rowid;" + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_10\";\n\nCREATE TABLE \"FEATURE_1_10\" AS\nSELECT MAX( t2.\"a12\" ) AS \"feature_1_10\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"META__STAGING_TABLE_2\" t2\nON t1.\"account_id\" = t2.\"account_id\"\nWHERE (\n ( ( t1.\"payments\" > 7240.000000 ) AND ( t2.\"type_card\" NOT IN ( 'gold', 'classic' ) OR t2.\"type_card\" IS NULL ) AND ( t1.\"frequency\" NOT IN ( 'POPLATEK PO OBRATU' ) OR t1.\"frequency\" IS NULL ) AND ( t2.\"type_disp\" NOT IN ( 'DISPONENT' ) OR t2.\"type_disp\" IS NULL ) )\n)\nGROUP BY t1.rowid;" }, "duration": { "name": "duration", "index": 10, "target": "default", - "importance": 0.0005039782195063603, + "importance": 0.0017088759676955176, "correlation": -0.0271918442877901, "sql": "" }, @@ -1050,7 +1050,7 @@ "name": "payments", "index": 11, "target": "default", - "importance": 0.0788924833005465, + "importance": 0.056348801452619314, "correlation": 0.10606259213768955, "sql": "" }, @@ -1058,28 +1058,28 @@ "name": "amount", "index": 12, "target": "default", - "importance": 0.07929936879044087, + "importance": 0.07789152304441642, "correlation": 0.06660193285904141, "sql": "" } }, "scores": [ { - "date_time": "2025-08-19T22:33:06", + "date_time": "2025-08-22T16:36:11", "set_used": "train", "target": "default", - "accuracy": 0.9825708061002179, - "auc": 0.9952295229522934, - "cross_entropy": 0.08200917844672241, + "accuracy": 0.9891067538126361, + "auc": 0.9982898289828985, + "cross_entropy": 0.07583523368134122, "type": "classification" }, { - "date_time": "2025-08-19T22:33:07", + "date_time": "2025-08-22T16:36:11", "set_used": "test", "target": "default", - "accuracy": 0.9551569506726457, - "auc": 0.8903818953323912, - "cross_entropy": 0.1751293856880503, + "accuracy": 0.9596412556053812, + "auc": 0.9245638849599219, + "cross_entropy": 0.15581770550714213, "type": "classification" } ] diff --git a/tests/integration/data/loans/loans.py b/tests/integration/data/loans/loans.py index f1f5d0e..7202c5c 100644 --- a/tests/integration/data/loans/loans.py +++ b/tests/integration/data/loans/loans.py @@ -21,7 +21,7 @@ ) -class LoansProject(GetMLProject): +class LoansProject(GetMLProject, frozen=True): pass diff --git a/tests/integration/data/numerical/expected.container.json b/tests/integration/data/numerical/expected.container.json index 1b4d6a6..5ce170d 100644 --- a/tests/integration/data/numerical/expected.container.json +++ b/tests/integration/data/numerical/expected.container.json @@ -5,7 +5,7 @@ "perph": { "name": "perph", "path": "container/peripheral/perph.parquet", - "column_profile": { + "columns": { "time_stamp": { "name": "time_stamp", "role": "time_stamp", @@ -62,7 +62,7 @@ "train": { "name": "train", "path": "container/subsets/train.parquet", - "column_profile": { + "columns": { "time_stamp": { "name": "time_stamp", "role": "time_stamp", @@ -135,7 +135,7 @@ "test": { "name": "test", "path": "container/subsets/test.parquet", - "column_profile": { + "columns": { "time_stamp": { "name": "time_stamp", "role": "time_stamp", diff --git a/tests/integration/data/numerical/expected.pipeline.json b/tests/integration/data/numerical/expected.pipeline.json index e13289d..1c35c59 100644 --- a/tests/integration/data/numerical/expected.pipeline.json +++ b/tests/integration/data/numerical/expected.pipeline.json @@ -1,16 +1,17 @@ { - "id": "td6iXW", + "id": "JPoiQ7", "predictions": { "train": { "name": "prediction.train", "path": "pipeline/predictions/prediction.train.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "targets": { + "name": "targets", + "role": "target", "statistics": { "count": 390, "approx_unique": 345, + "null_percentage": 0.0, "avg": 96.17355588521713, "min": 0.19379276037216187, "max": 154.47401428222656, @@ -18,9 +19,8 @@ "q50": 113.03656747606065, "q75": 126.34682067871094, "std": 40.818051834384214, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -28,13 +28,14 @@ "test": { "name": "prediction.test", "path": "pipeline/predictions/prediction.test.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "targets": { + "name": "targets", + "role": "target", "statistics": { "count": 110, "approx_unique": 97, + "null_percentage": 0.0, "avg": 93.47890945889733, "min": 1.1542826890945435, "max": 148.96876525878906, @@ -42,9 +43,8 @@ "q50": 110.78643798828125, "q75": 124.41654968261719, "std": 39.87365738682376, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -54,20 +54,20 @@ "train": { "name": "features.train", "path": "pipeline/feature_sets/features.train.parquet", - "column_profile": { + "columns": { "time_stamp": { "name": "time_stamp", "role": "time_stamp", "statistics": { "count": 390, "approx_unique": 460, + "null_percentage": 0.0, "avg": "1970-01-01T00:00:00.514975", "min": "1970-01-01T00:00:00.000426", "max": "1970-01-01T00:00:00.998435", "q25": "1970-01-01T00:00:00.271797", "q50": "1970-01-01T00:00:00.525929", "q75": "1970-01-01T00:00:00.755042", - "null_percentage": 0.0, "column_type": "TIMESTAMP_NS", "type": "time_stamp" } @@ -78,9 +78,9 @@ "statistics": { "count": 390, "approx_unique": 359, + "null_percentage": 0.0, "min": "0", "max": "99", - "null_percentage": 0.0, "column_type": "VARCHAR", "type": "join_key" } @@ -91,6 +91,7 @@ "statistics": { "count": 390, "approx_unique": 155, + "null_percentage": 0.0, "avg": 96.17692307692307, "min": 0.0, "max": 155.0, @@ -98,7 +99,6 @@ "q50": 113.30555555555556, "q75": 126.4, "std": 40.834822842459396, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -108,15 +108,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 365, - "avg": 96.16907668121483, - "min": 0.0, - "max": 154.95853837278798, - "q25": 63.785815721758624, - "q50": 113.27765320314755, - "q75": 126.50463010411934, - "std": 40.83179514271078, + "approx_unique": 429, "null_percentage": 0.0, + "avg": 96.16907668121472, + "min": 0.0, + "max": 154.95853837279128, + "q25": 63.785815721749735, + "q50": 113.27765320316149, + "q75": 126.50463010414038, + "std": 40.83179514271471, "column_type": "DOUBLE", "type": "numerical" } @@ -126,15 +126,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 341, - "avg": 0.8376766309807187, - "min": -0.005741885826104295, - "max": 2.719408433664087, - "q25": 0.6520781997282549, - "q50": 0.9700795623830403, - "q75": 1.0064016977783699, - "std": 0.25570143183522287, + "approx_unique": 368, "null_percentage": 0.0, + "avg": 0.8376766309770867, + "min": -0.00574188576048959, + "max": 2.719408433652314, + "q25": 0.6520781997463054, + "q50": 0.9700795623593829, + "q75": 1.0064016977636985, + "std": 0.2557014318160599, "column_type": "DOUBLE", "type": "numerical" } @@ -144,15 +144,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 463, - "avg": 86.38617317829464, - "min": 0.0, - "max": 139.48311171571882, - "q25": 57.23467415457054, - "q50": 101.87912254707474, - "q75": 113.60643119883412, - "std": 36.71267133509418, + "approx_unique": 401, "null_percentage": 0.0, + "avg": 86.38617317830524, + "min": 0.0, + "max": 139.48311171574812, + "q25": 57.23467415458568, + "q50": 101.87912254708868, + "q75": 113.60643119884278, + "std": 36.71267133509523, "column_type": "DOUBLE", "type": "numerical" } @@ -162,15 +162,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 353, - "avg": 1.4866715395011707, - "min": 0.0, - "max": 18.073525569347655, - "q25": 0.511371948685024, - "q50": 0.6982874132907706, - "q75": 1.4823021958627673, - "std": 2.4958715677815673, + "approx_unique": 410, "null_percentage": 0.0, + "avg": 1.486671539456154, + "min": 0.0, + "max": 18.073525569205152, + "q25": 0.5113719486477705, + "q50": 0.6982874132675109, + "q75": 1.4823021958122513, + "std": 2.4958715677642065, "column_type": "DOUBLE", "type": "numerical" } @@ -180,15 +180,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 354, - "avg": 78.06387842451866, - "min": 0.0, - "max": 125.92644437182243, - "q25": 51.62534776200333, - "q50": 92.20745748713837, - "q75": 102.69796763002303, - "std": 33.211534248078635, + "approx_unique": 501, "null_percentage": 0.0, + "avg": 78.06387842451753, + "min": 0.0, + "max": 125.92644437181266, + "q25": 51.62534776200353, + "q50": 92.20745748713694, + "q75": 102.69796763002027, + "std": 33.21153424807746, "column_type": "DOUBLE", "type": "numerical" } @@ -198,15 +198,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 461, - "avg": 0.5609713545719003, - "min": -1.8843295948944159, - "max": 7.386337073984908, - "q25": 0.31063646370630504, - "q50": 0.4926810309422083, - "q75": 0.5897466914903939, - "std": 1.0748256879378195, + "approx_unique": 347, "null_percentage": 0.0, + "avg": 0.5609713545760528, + "min": -1.884329594887943, + "max": 7.386337073977185, + "q25": 0.31063646371190884, + "q50": 0.4926810309466812, + "q75": 0.5897466914943321, + "std": 1.074825687935717, "column_type": "DOUBLE", "type": "numerical" } @@ -216,15 +216,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 383, - "avg": 70.10412027365707, - "min": 0.0, - "max": 113.10321385165179, - "q25": 46.531787803724455, - "q50": 82.5588664868703, - "q75": 92.25996193854685, - "std": 29.74870027991256, + "approx_unique": 375, "null_percentage": 0.0, + "avg": 70.10412027365358, + "min": 0.0, + "max": 113.1032138516468, + "q25": 46.53178780372366, + "q50": 82.55886648686204, + "q75": 92.25996193855656, + "std": 29.748700279910278, "column_type": "DOUBLE", "type": "numerical" } @@ -234,15 +234,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 387, - "avg": 0.5140333259704128, - "min": -0.4440243790143118, - "max": 0.9493693945162422, - "q25": 0.40480853237326986, - "q50": 0.6491476769378332, - "q75": 0.6766508980594182, - "std": 0.25950606351142164, + "approx_unique": 360, "null_percentage": 0.0, + "avg": 0.5140333259451388, + "min": -0.44402437926430016, + "max": 0.9493693946581522, + "q25": 0.4048085323434744, + "q50": 0.6491476769307485, + "q75": 0.6766508980495518, + "std": 0.2595060635478067, "column_type": "DOUBLE", "type": "numerical" } @@ -252,15 +252,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 449, - "avg": 62.90321872320113, - "min": 0.0, - "max": 101.78880366743772, - "q25": 41.77890864641813, - "q50": 74.08221392645562, - "q75": 82.57089164833283, - "std": 26.68981566900884, + "approx_unique": 372, "null_percentage": 0.0, + "avg": 62.90321872321255, + "min": 0.0, + "max": 101.78880366745221, + "q25": 41.77890864642829, + "q50": 74.082213926468, + "q75": 82.57089164834547, + "std": 26.689815669012244, "column_type": "DOUBLE", "type": "numerical" } @@ -270,15 +270,15 @@ "role": "numerical", "statistics": { "count": 390, - "approx_unique": 383, - "avg": 0.49645826039061436, - "min": -0.4538093717154356, - "max": 8.224653499362855, - "q25": -0.013087906352437301, - "q50": 0.16252383066178144, - "q75": 0.3213966011730961, - "std": 1.3656064778513446, + "approx_unique": 374, "null_percentage": 0.0, + "avg": 0.4964582603871059, + "min": -0.45380937171733143, + "max": 8.224653499320418, + "q25": -0.013087906355294835, + "q50": 0.16252383066014384, + "q75": 0.3213966011735171, + "std": 1.3656064778457808, "column_type": "DOUBLE", "type": "numerical" } @@ -289,6 +289,7 @@ "statistics": { "count": 390, "approx_unique": 366, + "null_percentage": 0.0, "avg": 0.007274057420148274, "min": -0.9999830328625017, "max": 0.9963392528271888, @@ -296,7 +297,6 @@ "q50": 0.007745629121322521, "q75": 0.5430378770360742, "std": 0.5852675384296699, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -306,20 +306,20 @@ "test": { "name": "features.test", "path": "pipeline/feature_sets/features.test.parquet", - "column_profile": { + "columns": { "time_stamp": { "name": "time_stamp", "role": "time_stamp", "statistics": { "count": 110, "approx_unique": 111, + "null_percentage": 0.0, "avg": "1970-01-01T00:00:00.512566", "min": "1970-01-01T00:00:00.005357", "max": "1970-01-01T00:00:00.999120", "q25": "1970-01-01T00:00:00.265168", "q50": "1970-01-01T00:00:00.526890", "q75": "1970-01-01T00:00:00.760226", - "null_percentage": 0.0, "column_type": "TIMESTAMP_NS", "type": "time_stamp" } @@ -330,9 +330,9 @@ "statistics": { "count": 110, "approx_unique": 116, + "null_percentage": 0.0, "min": "10", "max": "97", - "null_percentage": 0.0, "column_type": "VARCHAR", "type": "join_key" } @@ -343,6 +343,7 @@ "statistics": { "count": 110, "approx_unique": 66, + "null_percentage": 0.0, "avg": 93.48181818181818, "min": 1.0, "max": 150.0, @@ -350,7 +351,6 @@ "q50": 110.5, "q75": 125.0, "std": 39.905331049938304, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -360,15 +360,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 116, - "avg": 93.45267098578368, - "min": 1.000458133699219, - "max": 150.31561443814397, - "q25": 61.99609471912092, - "q50": 110.66337045642251, - "q75": 124.18593897756794, - "std": 39.90174967625208, + "approx_unique": 93, "null_percentage": 0.0, + "avg": 93.4526709857843, + "min": 1.000458133698913, + "max": 150.3156144381633, + "q25": 61.99609471911365, + "q50": 110.66337045640202, + "q75": 124.18593897769757, + "std": 39.90174967625765, "column_type": "DOUBLE", "type": "numerical" } @@ -378,15 +378,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 99, - "avg": 0.8371683464659075, - "min": 0.2616188358978256, - "max": 1.3008994594821477, - "q25": 0.6463480307323424, - "q50": 0.9769794994299659, - "q75": 0.9981102813377745, - "std": 0.22694451768807689, + "approx_unique": 103, "null_percentage": 0.0, + "avg": 0.8371683464618872, + "min": 0.261618835992537, + "max": 1.3008994593781122, + "q25": 0.6463480307642926, + "q50": 0.976979499407483, + "q75": 0.9981102813191179, + "std": 0.2269445176613666, "column_type": "DOUBLE", "type": "numerical" } @@ -396,15 +396,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 128, - "avg": 83.92568798146797, - "min": 0.8936319178550347, - "max": 135.34260543221382, - "q25": 55.618658113988545, - "q50": 99.41231630965478, - "q75": 112.10778547545671, - "std": 35.86158988509899, + "approx_unique": 110, "null_percentage": 0.0, + "avg": 83.92568798147742, + "min": 0.8936319178554479, + "max": 135.34260543222547, + "q25": 55.61865811400213, + "q50": 99.41231630965724, + "q75": 112.10778547547243, + "std": 35.86158988509892, "column_type": "DOUBLE", "type": "numerical" } @@ -414,15 +414,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 83, - "avg": 1.372963734240981, - "min": 0.26150392327196315, - "max": 14.74112345108553, - "q25": 0.5079183381167619, - "q50": 0.733030259305973, - "q75": 1.559876292211866, - "std": 2.1587383317179856, + "approx_unique": 110, "null_percentage": 0.0, + "avg": 1.372963734195574, + "min": 0.2615039232158267, + "max": 14.741123450963629, + "q25": 0.5079183380798892, + "q50": 0.733030259289265, + "q75": 1.5598762921426226, + "std": 2.158738331703888, "column_type": "DOUBLE", "type": "numerical" } @@ -432,15 +432,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 107, - "avg": 75.84121197115796, - "min": 0.8078674007376269, - "max": 122.03529180788084, - "q25": 50.16882583040319, - "q50": 90.30250196991355, - "q75": 101.01097835627817, - "std": 32.43788489337733, + "approx_unique": 104, "null_percentage": 0.0, + "avg": 75.8412119711572, + "min": 0.8078674007376313, + "max": 122.03529180787471, + "q25": 50.168825830403655, + "q50": 90.30250196992164, + "q75": 101.0109783562725, + "std": 32.437884893376435, "column_type": "DOUBLE", "type": "numerical" } @@ -450,15 +450,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 97, - "avg": 0.5049229013302758, - "min": -1.0234322123289814, - "max": 6.118363465727491, - "q25": 0.14283511563451265, - "q50": 0.48036527472462065, - "q75": 0.5681416543552915, - "std": 1.0283977404603937, + "approx_unique": 111, "null_percentage": 0.0, + "avg": 0.5049229013345454, + "min": -1.0234322123228132, + "max": 6.118363465720408, + "q25": 0.1428351156359696, + "q50": 0.48036527472915824, + "q75": 0.5681416543583435, + "std": 1.0283977404582119, "column_type": "DOUBLE", "type": "numerical" } @@ -468,15 +468,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 116, - "avg": 68.11771275543998, - "min": 0.727673394850341, - "max": 109.66181307292277, - "q25": 45.197855854500325, - "q50": 80.46348015049212, - "q75": 90.9621207543436, - "std": 29.061616382311644, + "approx_unique": 109, "null_percentage": 0.0, + "avg": 68.11771275543538, + "min": 0.727673394850336, + "max": 109.66181307295248, + "q25": 45.1978558544996, + "q50": 80.46348015049125, + "q75": 90.96212075432814, + "std": 29.061616382308916, "column_type": "DOUBLE", "type": "numerical" } @@ -486,15 +486,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 86, - "avg": 0.49790529198137257, - "min": -0.7685654995270176, - "max": 0.8965485719768319, - "q25": 0.3140667507684413, - "q50": 0.6465958189822099, - "q75": 0.6755108754950195, - "std": 0.2743109041542205, + "approx_unique": 130, "null_percentage": 0.0, + "avg": 0.4979052919543617, + "min": -0.7685654998274901, + "max": 0.8965485720180131, + "q25": 0.3140667507413654, + "q50": 0.6465958189744161, + "q75": 0.6755108754861249, + "std": 0.27431090419339493, "column_type": "DOUBLE", "type": "numerical" } @@ -504,15 +504,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 110, - "avg": 61.12278600295826, - "min": 0.6534488975859953, - "max": 98.15463652420087, - "q25": 40.59131151509016, - "q50": 71.44444119849189, - "q75": 81.54953124371187, - "std": 26.076794807984033, + "approx_unique": 116, "null_percentage": 0.0, + "avg": 61.122786002969285, + "min": 0.6534488975862314, + "max": 98.15463652421487, + "q25": 40.59131151510061, + "q50": 71.4444411985032, + "q75": 81.54953124372418, + "std": 26.076794807987245, "column_type": "DOUBLE", "type": "numerical" } @@ -522,15 +522,15 @@ "role": "numerical", "statistics": { "count": 110, - "approx_unique": 120, - "avg": 0.3782221036628038, - "min": -0.9960609366987908, - "max": 6.973058442492447, - "q25": -0.025090661724871316, - "q50": 0.1645438387261663, - "q75": 0.3035763747028691, - "std": 1.1509689359209772, + "approx_unique": 127, "null_percentage": 0.0, + "avg": 0.378222103659772, + "min": -0.9960609366958564, + "max": 6.973058442459302, + "q25": -0.025090661729046643, + "q50": 0.16454383872598383, + "q75": 0.3035763747020321, + "std": 1.1509689359165514, "column_type": "DOUBLE", "type": "numerical" } @@ -541,6 +541,7 @@ "statistics": { "count": 110, "approx_unique": 117, + "null_percentage": 0.0, "avg": 0.05299560208754448, "min": -0.953856078472245, "max": 0.9980389778878485, @@ -548,7 +549,6 @@ "q50": 0.06708147454405011, "q75": 0.5614427664757271, "std": 0.5410740412382254, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -570,19 +570,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "MEDIAN", + "COUNT DISTINCT", "STDDEV", - "COUNT", "AVG", - "TREND", - "COUNT DISTINCT", - "SUM", "FIRST", - "MODE", - "LAST", + "MEDIAN", + "COUNT", "COUNT MINUS COUNT DISTINCT", - "MAX", - "MIN" + "LAST", + "MIN", + "MODE", + "TREND", + "SUM", + "MAX" ], "delta_t": 0.0, "loss_function": "SquareLoss", @@ -734,80 +734,80 @@ "index": 0, "target": "targets", "importance": 0.9855079335297239, - "correlation": 0.9999677553832725, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_1\";\n\nCREATE TABLE \"FEATURE_1_1\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.504473 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.0002825082677636587 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.007415503263206161 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.003826871192511936 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * 0.0122621884520277 + 8.9312536387801140e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.504473 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.05804418805774981 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.2861045206793753 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * -0.0653957309758228 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * 0.3191843723547194 + 2.3485154307641487e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" > -0.921696 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.0002807963805906742 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.009555970634299542 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.001445637494295812 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.0186912319484488 + 9.9855091184214373e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" <= -0.921696 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.02214564726962967 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.03800937692930173 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.09246878641979493 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.06194856794978174 + 9.8800776047156758e-01\n ELSE NULL\n END\n) AS \"feature_1_1\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.9999677553832697, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_1\";\n\nCREATE TABLE \"FEATURE_1_1\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.504473 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.000282508267685821 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.007415503257972272 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.003826871192528156 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * 0.01226218844399789 + 8.9312536356660269e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.504473 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.05804418805569061 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.2861045210632998 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * -0.06539573097916862 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * 0.3191843727253872 + 2.3485154319764165e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" > -0.921696 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.0002807963805732821 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.009555970635634689 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.001445637494257335 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.018691231950002 + 9.9855091184212608e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" <= -0.921696 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.02214564726935706 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.03800937692744789 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.09246878641318639 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.06194856794735355 + 9.8800776047149863e-01\n ELSE NULL\n END\n) AS \"feature_1_1\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_2": { "name": "feature_1_2", "index": 1, "target": "targets", "importance": 3.235278199916987e-6, - "correlation": -0.45903221957864576, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_2\";\n\nCREATE TABLE \"FEATURE_1_2\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.504473 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.06609900457169174 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.6295295575571463 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.2835999498687981 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.02637379609146044 + 1.8258162605335087e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.504473 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 7.128685991272434 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 17.84616914426364 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * -9.164770141676748 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -11.62938113588722 + 1.1385935496578947e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" > -0.921696 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.006843251806502547 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.4975218568631604 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.02294642167405772 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.7527161752946725 + 9.9922470077978554e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" <= -0.921696 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.3390880692579381 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 4.603157429459366 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 11.77286124731349 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -5.099763745399616 + 1.0007022964293952e+00\n ELSE NULL\n END\n) AS \"feature_1_2\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": -0.45903221959203583, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_2\";\n\nCREATE TABLE \"FEATURE_1_2\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.504473 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.06609900457712579 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * -0.6295295572307659 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.283599949899933 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.02637379632213627 + 1.8258162596392789e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498925 ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.504473 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 7.128685991173269 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 17.84616912007614 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * -9.164770141652454 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -11.62938112078191 + 1.1385935500157862e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" > -0.921696 ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * 0.006843251805356447 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 0.4975218568619488 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 0.02294642167257786 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -0.7527161752303986 + 9.9922470077969716e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498925 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"column_01\" <= -0.921696 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.3476693939772724, 0.0 ) * -0.339088069307522 + COALESCE( t1.\"time_stamp\" - 0.6256710322580648, 0.0 ) * 4.603157429553225 + COALESCE( t2.\"column_01\" - 0.01279470270620846, 0.0 ) * 11.77286124710395 + COALESCE( t2.\"time_stamp\" - 0.3708535386317902, 0.0 ) * -5.099763745384771 + 1.0007022964289838e+00\n ELSE NULL\n END\n) AS \"feature_1_2\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_3": { "name": "feature_1_3", "index": 2, "target": "targets", "importance": 0.007550877454312558, - "correlation": 0.9999462263425182, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_3\";\n\nCREATE TABLE \"FEATURE_1_3\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" > 0.856386 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.08445650826980627 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 0.3550331227713587 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 0.03268783427556191 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -0.5264636906842577 + -3.0307853756374431e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" <= 0.856386 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.001597353554398187 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 0.03657358064072273 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 0.0009340103362134945 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -0.02896138146352568 + -3.0564557237884381e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498409 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.01804744327796228 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 4.674703457895101 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.1454134563545548 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -4.097979101666891 + -7.8981906116152012e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498409 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * -0.0007849000795266131 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * -0.005421658973588547 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.002308417886658116 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 0.02826561511005675 + 9.0185439055969818e-01\n ELSE NULL\n END\n) AS \"feature_1_3\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.999946226342519, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_3\";\n\nCREATE TABLE \"FEATURE_1_3\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" > 0.856386 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.08445650826962771 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 0.3550331227683526 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 0.03268783427551198 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -0.5264636906796042 + -3.0307853756206971e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" <= 0.856386 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.001597353554397896 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 0.03657358063967483 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 0.000934010336265935 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -0.0289613814619177 + -3.0564557237572624e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498409 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.01804744326728043 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 4.67470345827746 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.1454134563580121 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -4.097979102011514 + -7.8981906127564594e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498409 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * -0.0007849000794958028 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * -0.005421658974806012 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.002308417886516971 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 0.02826561511172634 + 9.0185439055992067e-01\n ELSE NULL\n END\n) AS \"feature_1_3\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_4": { "name": "feature_1_4", "index": 3, "target": "targets", "importance": 8.533660221786729e-6, - "correlation": 0.3077574208172454, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_4\";\n\nCREATE TABLE \"FEATURE_1_4\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" > 0.765917 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 14.03809250481727 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 40.57658306419481 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 24.64387486121828 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 14.70945117299917 + -1.3055442491942787e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" <= 0.765917 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.3245142029734437 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 6.166202794466526 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.5179467504298221 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 1.178685664959367 + -1.6388722882609414e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498409 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 6.342664729713697 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 202.954310079599 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -18.83705142814225 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -128.5493277818914 + 2.8962240854574806e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498409 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * -0.01298483961864202 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * -0.9633164199912341 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.1539160834850153 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 3.190234334521742 + 8.9814202462828985e-01\n ELSE NULL\n END\n) AS \"feature_1_4\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.30775742081927077, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_4\";\n\nCREATE TABLE \"FEATURE_1_4\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" > 0.765917 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 14.03809250474164 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 40.57658306358533 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * 24.64387486116014 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 14.70945117346453 + -1.3055442491838373e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501775 ) AND ( t1.\"column_01\" <= 0.765917 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 0.3245142029727683 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 6.166202793989797 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.517946750411035 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 1.178685665522778 + -1.6388722882052752e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498409 ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * 6.342664729741755 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * 202.9543100852445 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -18.83705142830901 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * -128.549327784358 + 2.8962240853847226e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501775 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498409 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1554697090156631, 0.0 ) * -0.012984839620059 + COALESCE( t1.\"time_stamp\" - 0.5534843333333335, 0.0 ) * -0.9633164200364074 + COALESCE( t2.\"column_01\" - 0.0150488310381689, 0.0 ) * -0.1539160834834402 + COALESCE( t2.\"time_stamp\" - 0.3408921897827647, 0.0 ) * 3.190234334759545 + 8.9814202462800707e-01\n ELSE NULL\n END\n) AS \"feature_1_4\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_5": { "name": "feature_1_5", "index": 4, "target": "targets", "importance": 0.00005670347744705081, - "correlation": 0.9997852958452743, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_5\";\n\nCREATE TABLE \"FEATURE_1_5\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" > 0.831731 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.08953914195307049 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.07266643304116419 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.02109235012870913 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * -0.1450644849730639 + -1.3228435587994120e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" <= 0.831731 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.001224244382021227 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.02224869985962535 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 0.007475664704203301 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * -0.02246737996915927 + -1.7288886029105145e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498453 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.01473497402431667 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -3.67171733873037 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 0.07565616902957738 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 3.756136203566701 + 1.6896615371518224e+00\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498453 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -0.0006878528908862898 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -0.006825793080495507 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.00141834710142976 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 0.01747351001934011 + 8.1129050420365478e-01\n ELSE NULL\n END\n) AS \"feature_1_5\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.999785295845277, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_5\";\n\nCREATE TABLE \"FEATURE_1_5\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" > 0.831731 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.08953914195278499 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.07266643304147295 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.02109235012904008 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * -0.1450644849734724 + -1.3228435587968082e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" <= 0.831731 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.001224244382022964 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.02224869986017617 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 0.007475664704182282 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * -0.02246737996980406 + -1.7288886029206179e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498453 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.01473497402563573 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -3.671717338708963 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 0.07565616903077083 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 3.756136203536811 + 1.6896615371440473e+00\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498453 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -0.0006878528908973636 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -0.006825793080319673 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.001418347101456858 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 0.01747351001884504 + 8.1129050420361315e-01\n ELSE NULL\n END\n) AS \"feature_1_5\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_6": { "name": "feature_1_6", "index": 5, "target": "targets", "importance": 2.4247201030086813e-6, - "correlation": 0.010240239170331641, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_6\";\n\nCREATE TABLE \"FEATURE_1_6\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" > 0.831731 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 21.46467875970271 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -22.11881555806786 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -10.2375015576399 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 52.23159582549691 + -1.9053976101947895e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" <= 0.831731 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.0465685582484428 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.4601144771700485 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 1.868012102045002 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 4.065531584414509 + -2.3782339887040587e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498453 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -2.791649001040807 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -341.7583668518341 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 19.28320620113372 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 180.1951572050513 + 1.0021678223934718e+00\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498453 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -0.011965661142605 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -0.4234434507345259 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.04975855094688834 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 1.933400867438229 + 8.0785574938562288e-01\n ELSE NULL\n END\n) AS \"feature_1_6\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.010240239169991854, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_6\";\n\nCREATE TABLE \"FEATURE_1_6\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" > 0.831731 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 21.46467875959469 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -22.118815557859 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -10.2375015576486 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 52.23159582506085 + -1.9053976102095270e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501743 ) AND ( t1.\"column_01\" <= 0.831731 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * 0.04656855824781388 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * 0.4601144772129166 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 1.868012102048024 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 4.065531584344288 + -2.3782339887221280e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498453 ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -2.791649000796464 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -341.7583668521192 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * 19.28320620128335 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 180.1951572048963 + 1.0021678223933099e+00\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501743 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498453 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.4063565992022968, 0.0 ) * -0.01196566114318455 + COALESCE( t1.\"time_stamp\" - 0.5375246333333336, 0.0 ) * -0.4234434507238579 + COALESCE( t2.\"column_01\" - 0.006255480359208636, 0.0 ) * -0.04975855094623178 + COALESCE( t2.\"time_stamp\" - 0.3312582032343706, 0.0 ) * 1.93340086740199 + 8.0785574938563665e-01\n ELSE NULL\n END\n) AS \"feature_1_6\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_7": { "name": "feature_1_7", "index": 6, "target": "targets", "importance": 0.006753733723576533, - "correlation": 0.9999556424041055, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_7\";\n\nCREATE TABLE \"FEATURE_1_7\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" > -0.846143 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.001493861933905013 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.007182437891560771 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 0.02489841822192869 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -0.007455280418811005 + -7.7090585490841215e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" <= -0.846143 OR t2.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 0.02191841441823272 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 0.2288513466118687 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 0.2152923315884198 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -0.05725722024827516 + 1.6274132167775598e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498474 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.1285670433349319 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 0.09380642781832983 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.04844798227211269 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 0.02041704015912599 + 4.2353371621539826e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498474 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 6.132603747064745e-05 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.004978033261172977 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.003803004420420616 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 0.008848936152593714 + 7.2967626442543165e-01\n ELSE NULL\n END\n) AS \"feature_1_7\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.999955642404105, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_7\";\n\nCREATE TABLE \"FEATURE_1_7\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" > -0.846143 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.001493861933643545 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.007182437898005385 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 0.0248984182219779 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -0.00745528041535602 + -7.7090585466409551e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" <= -0.846143 OR t2.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 0.02191841441560858 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 0.2288513466714222 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 0.2152923316731586 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -0.0572572202567467 + 1.6274132173844469e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498474 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.1285670433361769 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 0.09380642786771355 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.04844798227084408 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 0.02041704011971352 + 4.2353371620122088e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498474 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 6.132603745732198e-05 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.004978033261306847 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.003803004420437763 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 0.008848936152713826 + 7.2967626442540678e-01\n ELSE NULL\n END\n) AS \"feature_1_7\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_8": { "name": "feature_1_8", "index": 7, "target": "targets", "importance": 2.500948473796868e-6, - "correlation": -0.4540189912321207, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_8\";\n\nCREATE TABLE \"FEATURE_1_8\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" > -0.846143 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.5213707574939598 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 2.858497364273565 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 4.98967581918408 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -2.454680877407986 + -1.3695016487991716e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" <= -0.846143 OR t2.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 5.163584444329135 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 3.642674764502531 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 29.69273767381162 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -15.36350301350208 + 1.5193268230959023e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498474 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -17.85508990100138 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 11.21012552801582 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -8.582013412185995 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -7.359260072650577 + 4.2183749315466107e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498474 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 0.02074566194157262 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.5384577190357468 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.1143310783423644 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 1.106283632734646 + 7.2873366938851347e-01\n ELSE NULL\n END\n) AS \"feature_1_8\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": -0.454018991207604, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_8\";\n\nCREATE TABLE \"FEATURE_1_8\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" > -0.846143 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -0.5213707574853998 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 2.858497365276426 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 4.989675819181841 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -2.45468087776976 + -1.3695016489779270e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501651 ) AND ( t2.\"column_01\" <= -0.846143 OR t2.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 5.163584444202168 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 3.642674753706717 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * 29.69273767691739 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -15.36350301404135 + 1.5193268233693011e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498474 ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * -17.85508990068798 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * 11.21012552256822 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -8.582013412028223 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * -7.359260070109665 + 4.2183749316339209e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501651 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498474 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.2401549331032501, 0.0 ) * 0.02074566194006722 + COALESCE( t1.\"time_stamp\" - 0.6052577878787879, 0.0 ) * -0.5384577190447076 + COALESCE( t2.\"column_01\" - 0.01744899727986292, 0.0 ) * -0.1143310783406135 + COALESCE( t2.\"time_stamp\" - 0.3586735348654483, 0.0 ) * 1.106283632790235 + 7.2873366938843842e-01\n ELSE NULL\n END\n) AS \"feature_1_8\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_9": { "name": "feature_1_9", "index": 8, "target": "targets", "importance": 0.00011069700912831822, - "correlation": 0.9998800147496907, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_9\";\n\nCREATE TABLE \"FEATURE_1_9\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" > 0.506598 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.02806341807642407 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 0.1005855667727084 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 0.05043917424967326 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * -0.211674367137595 + -1.1278947500145307e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" <= 0.506598 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.001164477789418083 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.007312218559158858 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.01802190327160747 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.02071044264103949 + -5.5907828958983632e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498441 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.03602918186805804 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 0.5558772346193536 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 0.004575154971044848 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.04365762583061258 + 2.5574551943952029e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498441 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * -0.0003786663742407553 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.01175584053811839 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.001011664105964864 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.03034975195777023 + 6.5809460173427137e-01\n ELSE NULL\n END\n) AS \"feature_1_9\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.9998800147496908, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_9\";\n\nCREATE TABLE \"FEATURE_1_9\" AS\nSELECT SUM( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" > 0.506598 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.02806341807644875 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 0.1005855667731265 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 0.0504391742493767 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * -0.2116743671383328 + -1.1278947500164904e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" <= 0.506598 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.001164477789426901 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.007312218559007408 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.0180219032715451 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.02071044264087463 + -5.5907828959129019e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498441 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.03602918187259455 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 0.5558772346075392 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 0.004575154971283778 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.04365762584271543 + 2.5574551944266977e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498441 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * -0.0003786663742680087 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.01175584053834853 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.001011664105895818 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 0.03034975195783616 + 6.5809460173436929e-01\n ELSE NULL\n END\n) AS \"feature_1_9\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "feature_1_10": { "name": "feature_1_10", "index": 9, "target": "targets", "importance": 2.2158050428138524e-6, - "correlation": 0.30509226848518806, - "sql": "DROP TABLE IF EXISTS \"FEATURE_1_10\";\n\nCREATE TABLE \"FEATURE_1_10\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" > 0.643560 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 9.877188827889023 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 12.01459646074551 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 8.876757988330217 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 7.40363324168734 + -7.3226440490680805e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" <= 0.643560 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.1101768936585742 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -1.022134954047455 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -3.906771442750788 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 4.599728380986098 + -5.0424784350053672e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498441 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 14.83783889032573 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 14.68937003402948 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 5.614331332820621 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 81.73119784024503 + 4.0128967526428971e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498441 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * -0.02393403673832931 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.8606425343024822 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.0389454131820322 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 3.924829063046743 + 6.5128984222492303e-01\n ELSE NULL\n END\n) AS \"feature_1_10\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" + "correlation": 0.30509226848755383, + "sql": "DROP TABLE IF EXISTS \"FEATURE_1_10\";\n\nCREATE TABLE \"FEATURE_1_10\" AS\nSELECT AVG( \n CASE\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" > 0.643560 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 9.877188827855255 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 12.01459646065051 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 8.876757988286263 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 7.403633241716817 + -7.3226440490337677e-02\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.501842 ) AND ( t1.\"column_01\" <= 0.643560 OR t1.\"column_01\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 0.110176893658398 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -1.022134954043423 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -3.906771442738906 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 4.599728380985646 + -5.0424784350230683e-03\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" > 0.498441 ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * 14.83783889044675 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * 14.68937003267403 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * 5.6143313328757 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 81.73119784028188 + 4.0128967526534398e-01\n WHEN ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.501842 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) AND ( t1.\"time_stamp\" - t2.\"time_stamp\" <= 0.498441 OR t1.\"time_stamp\" IS NULL OR t2.\"time_stamp\" IS NULL ) THEN COALESCE( t1.\"column_01\" - -0.1136283210506594, 0.0 ) * -0.02393403673893582 + COALESCE( t1.\"time_stamp\" - 0.6267969285714285, 0.0 ) * -0.8606425342940097 + COALESCE( t2.\"column_01\" - 0.005701637464124793, 0.0 ) * -0.03894541318073007 + COALESCE( t2.\"time_stamp\" - 0.3721112498869289, 0.0 ) * 3.924829063050257 + 6.5128984222487241e-01\n ELSE NULL\n END\n) AS \"feature_1_10\",\n t1.rowid AS rownum\nFROM \"POPULATION__STAGING_TABLE_1\" t1\nINNER JOIN \"PERPH__STAGING_TABLE_2\" t2\nON t1.\"join_key\" = t2.\"join_key\"\nWHERE t2.\"time_stamp\" <= t1.\"time_stamp\"\nGROUP BY t1.rowid;" }, "column_01": { "name": "column_01", @@ -820,7 +820,7 @@ }, "scores": [ { - "date_time": "2025-08-19T22:33:16", + "date_time": "2025-08-22T16:36:20", "set_used": "train", "target": "targets", "mae": 0.16209001296605818, @@ -829,7 +829,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:18", + "date_time": "2025-08-22T16:36:22", "set_used": "test", "target": "targets", "mae": 0.47556934465061534, diff --git a/tests/integration/data/numerical/numerical.py b/tests/integration/data/numerical/numerical.py index cdd5344..3fb95f9 100644 --- a/tests/integration/data/numerical/numerical.py +++ b/tests/integration/data/numerical/numerical.py @@ -23,7 +23,7 @@ ) -class NumericalProject(GetMLProject): +class NumericalProject(GetMLProject, frozen=True): pass diff --git a/tests/integration/data/robot/expected.container.json b/tests/integration/data/robot/expected.container.json index dadde8b..a34f846 100644 --- a/tests/integration/data/robot/expected.container.json +++ b/tests/integration/data/robot/expected.container.json @@ -5,7 +5,7 @@ "full": { "name": "full", "path": "container/peripheral/full.parquet", - "column_profile": { + "columns": { "rowid": { "name": "rowid", "role": "time_stamp", @@ -1759,7 +1759,7 @@ "train": { "name": "full", "path": "container/subsets/train.full.parquet", - "column_profile": { + "columns": { "rowid": { "name": "rowid", "role": "time_stamp", @@ -3511,7 +3511,7 @@ "validation": { "name": "full", "path": "container/subsets/validation.full.parquet", - "column_profile": { + "columns": { "rowid": { "name": "rowid", "role": "time_stamp", @@ -5263,7 +5263,7 @@ "test": { "name": "full", "path": "container/subsets/test.full.parquet", - "column_profile": { + "columns": { "rowid": { "name": "rowid", "role": "time_stamp", diff --git a/tests/integration/data/robot/expected.pipeline.json b/tests/integration/data/robot/expected.pipeline.json index 84ad49d..cce0b02 100644 --- a/tests/integration/data/robot/expected.pipeline.json +++ b/tests/integration/data/robot/expected.pipeline.json @@ -1,16 +1,17 @@ { - "id": "XLdkKu", + "id": "cDvmF1", "predictions": { "train": { "name": "prediction.train", "path": "pipeline/predictions/prediction.train.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "f_x": { + "name": "f_x", + "role": "target", "statistics": { "count": 90, "approx_unique": 92, + "null_percentage": 0.0, "avg": -10.733253468407526, "min": -11.220943450927734, "max": -10.405853271484375, @@ -18,17 +19,17 @@ "q50": -10.71152925491333, "q75": -10.611865997314453, "std": 0.17923356795586112, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "1": { - "name": "1", - "role": "unused_float", + "f_y": { + "name": "f_y", + "role": "target", "statistics": { "count": 90, "approx_unique": 96, + "null_percentage": 0.0, "avg": 6.420799822277493, "min": 5.9611358642578125, "max": 6.9497480392456055, @@ -36,17 +37,17 @@ "q50": 6.429170370101929, "q75": 6.589086055755615, "std": 0.23492959994799614, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "2": { - "name": "2", - "role": "unused_float", + "f_z": { + "name": "f_z", + "role": "target", "statistics": { "count": 90, "approx_unique": 89, + "null_percentage": 0.0, "avg": -7.648273118336996, "min": -7.977388381958008, "max": -7.296791076660156, @@ -54,9 +55,8 @@ "q50": -7.645588159561157, "q75": -7.6043314933776855, "std": 0.11707979723485837, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -64,13 +64,14 @@ "validation": { "name": "prediction.validation", "path": "pipeline/predictions/prediction.validation.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "f_x": { + "name": "f_x", + "role": "target", "statistics": { "count": 30, "approx_unique": 35, + "null_percentage": 0.0, "avg": -10.93016627629598, "min": -11.100025177001953, "max": -10.782144546508789, @@ -78,17 +79,17 @@ "q50": -10.914761066436768, "q75": -10.841729164123535, "std": 0.09749280830566094, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "1": { - "name": "1", - "role": "unused_float", + "f_y": { + "name": "f_y", + "role": "target", "statistics": { "count": 30, "approx_unique": 32, + "null_percentage": 0.0, "avg": 6.241030391057333, "min": 5.999167442321777, "max": 6.531711578369141, @@ -96,17 +97,17 @@ "q50": 6.203312873840332, "q75": 6.360936164855957, "std": 0.14808194665012037, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "2": { - "name": "2", - "role": "unused_float", + "f_z": { + "name": "f_z", + "role": "target", "statistics": { "count": 30, "approx_unique": 30, + "null_percentage": 0.0, "avg": -7.675311549504598, "min": -7.775376319885254, "max": -7.590496063232422, @@ -114,9 +115,8 @@ "q50": -7.686032295227051, "q75": -7.637664794921875, "std": 0.04378525558931166, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -124,13 +124,14 @@ "test": { "name": "prediction.test", "path": "pipeline/predictions/prediction.test.parquet", - "column_profile": { - "0": { - "name": "0", - "role": "unused_float", + "columns": { + "f_x": { + "name": "f_x", + "role": "target", "statistics": { "count": 30, "approx_unique": 30, + "null_percentage": 0.0, "avg": -10.998719056447348, "min": -11.182074546813965, "max": -10.69487476348877, @@ -138,17 +139,17 @@ "q50": -11.018784046173096, "q75": -10.933016777038574, "std": 0.10115848786445528, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "1": { - "name": "1", - "role": "unused_float", + "f_y": { + "name": "f_y", + "role": "target", "statistics": { "count": 30, "approx_unique": 25, + "null_percentage": 0.0, "avg": 6.590824826558431, "min": 6.305027008056641, "max": 6.856055736541748, @@ -156,17 +157,17 @@ "q50": 6.575701713562012, "q75": 6.6769700050354, "std": 0.13825323392743774, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } }, - "2": { - "name": "2", - "role": "unused_float", + "f_z": { + "name": "f_z", + "role": "target", "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -7.63157286643982, "min": -7.698906421661377, "max": -7.564383506774902, @@ -174,9 +175,8 @@ "q50": -7.634854078292847, "q75": -7.6035566329956055, "std": 0.034576567990887985, - "null_percentage": 0.0, "column_type": "DOUBLE", - "type": "unused_float" + "type": "target" } } } @@ -186,13 +186,14 @@ "train": { "name": "features.train", "path": "pipeline/feature_sets/features.train.parquet", - "column_profile": { + "columns": { "f_x": { "name": "f_x", "role": "target", "statistics": { "count": 90, "approx_unique": 75, + "null_percentage": 0.0, "avg": -10.733866666666666, "min": -11.23, "max": -10.39, @@ -200,7 +201,6 @@ "q50": -10.706, "q75": -10.632, "std": 0.18424118291833055, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -211,6 +211,7 @@ "statistics": { "count": 90, "approx_unique": 67, + "null_percentage": 0.0, "avg": 6.4211588888888915, "min": 5.95, "max": 6.9718, @@ -218,7 +219,6 @@ "q50": 6.43725, "q75": 6.59, "std": 0.23847778305323794, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -229,6 +229,7 @@ "statistics": { "count": 90, "approx_unique": 61, + "null_percentage": 0.0, "avg": -7.6486744444444446, "min": -7.99, "max": -7.29, @@ -236,7 +237,6 @@ "q50": -7.641349999999999, "q75": -7.61, "std": 0.12120592891745888, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -247,6 +247,7 @@ "statistics": { "count": 90, "approx_unique": 83, + "null_percentage": 0.0, "avg": 0.3029163688055355, "min": -0.31465415076773495, "max": 0.7827058558842352, @@ -254,7 +255,6 @@ "q50": 0.3249082843979988, "q75": 0.38590252715777634, "std": 0.16481064171623752, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -265,6 +265,7 @@ "statistics": { "count": 90, "approx_unique": 91, + "null_percentage": 0.0, "avg": 0.011959457306833126, "min": -0.48373951342150484, "max": 0.2027935216787802, @@ -272,7 +273,6 @@ "q50": 0.035454099024066446, "q75": 0.14938576206617865, "std": 0.14943260045296886, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -283,6 +283,7 @@ "statistics": { "count": 90, "approx_unique": 72, + "null_percentage": 0.0, "avg": 0.08407029635776446, "min": -0.2704927088884153, "max": 0.4940717531491363, @@ -290,7 +291,6 @@ "q50": 0.05748586711121888, "q75": 0.2299577280967114, "std": 0.17527049908816736, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -301,6 +301,7 @@ "statistics": { "count": 90, "approx_unique": 46, + "null_percentage": 0.0, "avg": 0.25188956172160787, "min": -0.23509573529982242, "max": 0.5382123577854343, @@ -308,7 +309,6 @@ "q50": 0.2930111528134115, "q75": 0.3550458832113746, "std": 0.1812398668302307, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -319,6 +319,7 @@ "statistics": { "count": 90, "approx_unique": 69, + "null_percentage": 0.0, "avg": 0.3927403093612645, "min": 0.17607518464691357, "max": 0.6725212867530115, @@ -326,7 +327,6 @@ "q50": 0.4038435104717734, "q75": 0.4451822390825191, "std": 0.11480776766812614, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -337,6 +337,7 @@ "statistics": { "count": 90, "approx_unique": 69, + "null_percentage": 0.0, "avg": -0.09227228754178138, "min": -0.4048465815826436, "max": 0.098293559970952, @@ -344,7 +345,6 @@ "q50": -0.0685528452025958, "q75": -0.002593265132116984, "std": 0.1165370944586005, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -355,6 +355,7 @@ "statistics": { "count": 90, "approx_unique": 62, + "null_percentage": 0.0, "avg": 1.2079429885282171, "min": 0.8159451174374466, "max": 1.5123557867430288, @@ -362,7 +363,6 @@ "q50": 1.2223826534027085, "q75": 1.2734256322534585, "std": 0.11607311125787807, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -373,6 +373,7 @@ "statistics": { "count": 90, "approx_unique": 89, + "null_percentage": 0.0, "avg": -0.045267897325640655, "min": -0.39037741491000255, "max": 0.1368375874054001, @@ -380,7 +381,6 @@ "q50": -0.02593470190059971, "q75": 0.032217117912338666, "std": 0.1107454364608701, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -391,6 +391,7 @@ "statistics": { "count": 90, "approx_unique": 51, + "null_percentage": 0.0, "avg": 0.10811481047424647, "min": -0.21980819819546835, "max": 0.3258610834389272, @@ -398,7 +399,6 @@ "q50": 0.1224927800654327, "q75": 0.1807290254159039, "std": 0.09278593218207873, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -409,6 +409,7 @@ "statistics": { "count": 90, "approx_unique": 91, + "null_percentage": 0.0, "avg": 0.12009873414329322, "min": -0.19853987390199737, "max": 0.31609433253497554, @@ -416,7 +417,6 @@ "q50": 0.12706491197836067, "q75": 0.16441382269084, "std": 0.08682787300981036, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -427,6 +427,7 @@ "statistics": { "count": 90, "approx_unique": 77, + "null_percentage": 0.0, "avg": -0.47729084285094814, "min": -1.0107945710816848, "max": 0.0002537942929515135, @@ -434,7 +435,6 @@ "q50": -0.5199679586889386, "q75": -0.29424806383550745, "std": 0.24990550858449123, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -445,6 +445,7 @@ "statistics": { "count": 90, "approx_unique": 81, + "null_percentage": 0.0, "avg": 0.02325322437330462, "min": -0.4898249515041333, "max": 0.5596788872673188, @@ -452,7 +453,6 @@ "q50": 0.004578387908287743, "q75": 0.24321247773407953, "std": 0.2322925953111929, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -463,6 +463,7 @@ "statistics": { "count": 90, "approx_unique": 90, + "null_percentage": 0.0, "avg": -0.3929188972604346, "min": -0.950145815170969, "max": -0.02296707125198176, @@ -470,7 +471,6 @@ "q50": -0.3538856846165158, "q75": -0.24813260903173434, "std": 0.20085603094306023, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -481,6 +481,7 @@ "statistics": { "count": 90, "approx_unique": 76, + "null_percentage": 0.0, "avg": -0.2727695078967271, "min": -0.6477196351564702, "max": 0.16378335384162102, @@ -488,7 +489,6 @@ "q50": -0.29568048528186175, "q75": -0.15561341900220704, "std": 0.17578182846904047, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -499,6 +499,7 @@ "statistics": { "count": 90, "approx_unique": 73, + "null_percentage": 0.0, "avg": 0.14716625846389136, "min": -0.17351283839885634, "max": 0.5103964053629805, @@ -506,7 +507,6 @@ "q50": 0.1250140309126661, "q75": 0.2257540196259104, "std": 0.1503827430278282, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -517,6 +517,7 @@ "statistics": { "count": 90, "approx_unique": 89, + "null_percentage": 0.0, "avg": -0.07527183787062067, "min": -0.36892613642037103, "max": 0.45334980309151224, @@ -524,7 +525,6 @@ "q50": -0.07342173104960995, "q75": 0.025990090474862337, "std": 0.15400346506978072, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -535,6 +535,7 @@ "statistics": { "count": 90, "approx_unique": 56, + "null_percentage": 0.0, "avg": 2.0755824268813807, "min": 1.7119138863476728, "max": 2.5039235973072005, @@ -542,7 +543,6 @@ "q50": 2.065960158016336, "q75": 2.1541396565925037, "std": 0.15051918025560856, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -553,6 +553,7 @@ "statistics": { "count": 90, "approx_unique": 87, + "null_percentage": 0.0, "avg": 0.00008866011991140269, "min": -0.3411377235696948, "max": 0.5151189655917409, @@ -560,7 +561,6 @@ "q50": -0.007322601493110977, "q75": 0.08251932706319809, "std": 0.15939682436221483, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -571,6 +571,7 @@ "statistics": { "count": 90, "approx_unique": 39, + "null_percentage": 0.0, "avg": -7.441957950972197, "min": -7.769466128030755, "max": -7.214195121871735, @@ -578,7 +579,6 @@ "q50": -7.409897130096453, "q75": -7.360898515991032, "std": 0.13135319135752388, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -589,6 +589,7 @@ "statistics": { "count": 90, "approx_unique": 57, + "null_percentage": 0.0, "avg": -0.09360797138047791, "min": -0.4306131483697273, "max": 0.20935051577885525, @@ -596,7 +597,6 @@ "q50": -0.038237276962355835, "q75": -0.017741479253866304, "std": 0.140422650506953, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -607,6 +607,7 @@ "statistics": { "count": 90, "approx_unique": 53, + "null_percentage": 0.0, "avg": -0.19424066283352154, "min": -0.42962468282768174, "max": 0.2583364066426611, @@ -614,7 +615,6 @@ "q50": -0.1837533599231441, "q75": -0.15961375991220164, "std": 0.10997469664193683, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -625,6 +625,7 @@ "statistics": { "count": 90, "approx_unique": 74, + "null_percentage": 0.0, "avg": 0.16116088565306536, "min": -0.17886279634534932, "max": 0.4510687263935296, @@ -632,7 +633,6 @@ "q50": 0.1405825019304881, "q75": 0.21673936496584525, "std": 0.11008032539154326, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -643,6 +643,7 @@ "statistics": { "count": 90, "approx_unique": 41, + "null_percentage": 0.0, "avg": -0.10600376901922601, "min": -0.3742752151884578, "max": 0.19878674657853865, @@ -650,7 +651,6 @@ "q50": -0.13384372883065127, "q75": -0.06640058640152215, "std": 0.09117236559678171, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -661,6 +661,7 @@ "statistics": { "count": 90, "approx_unique": 39, + "null_percentage": 0.0, "avg": -0.1030715136653807, "min": -0.42756259522132284, "max": 0.11200664785622663, @@ -668,7 +669,6 @@ "q50": -0.10038447275439095, "q75": -0.05095238102152804, "std": 0.08877577656458578, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -679,6 +679,7 @@ "statistics": { "count": 90, "approx_unique": 65, + "null_percentage": 0.0, "avg": -0.046826073151904346, "min": -0.3058050102447669, "max": 0.2682550750482219, @@ -686,7 +687,6 @@ "q50": -0.052241704717715165, "q75": 0.0006932049804602974, "std": 0.10017992474241678, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -697,6 +697,7 @@ "statistics": { "count": 90, "approx_unique": 21, + "null_percentage": 0.0, "avg": -0.31550103434926957, "min": -0.3685292971362771, "max": 4.514633770863752e-16, @@ -704,7 +705,6 @@ "q50": -0.3160387001271303, "q75": -0.28238497777218724, "std": 0.07773440134663441, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -715,6 +715,7 @@ "statistics": { "count": 90, "approx_unique": 68, + "null_percentage": 0.0, "avg": -0.3481756919057059, "min": -0.5786182414067055, "max": -0.034596193821641634, @@ -722,7 +723,6 @@ "q50": -0.35567043181015146, "q75": -0.294052063651414, "std": 0.09619705685778725, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -733,6 +733,7 @@ "statistics": { "count": 90, "approx_unique": 44, + "null_percentage": 0.0, "avg": -0.09378414469456128, "min": -0.41303397089738386, "max": 0.210618903884933, @@ -740,7 +741,6 @@ "q50": -0.11831335454226283, "q75": -0.04732534181690513, "std": 0.0921549989239766, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -751,6 +751,7 @@ "statistics": { "count": 90, "approx_unique": 62, + "null_percentage": 0.0, "avg": -0.2881041929800976, "min": -0.43589553735072306, "max": -0.0615262923916052, @@ -758,7 +759,6 @@ "q50": -0.29478310312967637, "q75": -0.25626743180480765, "std": 0.0862292982088366, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -769,6 +769,7 @@ "statistics": { "count": 90, "approx_unique": 74, + "null_percentage": 0.0, "avg": -0.026348468239984452, "min": -0.36225394997882004, "max": 0.24685076123720567, @@ -776,7 +777,6 @@ "q50": -0.015733169682750348, "q75": 0.024132284324813083, "std": 0.08411080433688203, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -787,6 +787,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 3.4097999999999993, "min": 3.4098, "max": 3.4098, @@ -794,7 +795,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -805,6 +805,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.32736999999999933, "min": -0.32737, "max": -0.32737, @@ -812,7 +813,6 @@ "q50": -0.32737, "q75": -0.32737, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -823,6 +823,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.9604400000000006, "min": 0.96044, "max": 0.96044, @@ -830,7 +831,6 @@ "q50": 0.96044, "q75": 0.96044, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -841,6 +841,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -3.7435999999999985, "min": -3.7436, "max": -3.7436, @@ -848,7 +849,6 @@ "q50": -3.7436, "q75": -3.7436, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -859,6 +859,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.0190999999999992, "min": -1.0191, "max": -1.0191, @@ -866,7 +867,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -877,6 +877,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020500000000011, "min": -6.0205, "max": -6.0205, @@ -884,7 +885,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -895,6 +895,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -902,7 +903,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -913,6 +913,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -920,7 +921,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -931,6 +931,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -938,7 +939,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -949,6 +949,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -956,7 +957,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -967,6 +967,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -974,7 +975,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -985,6 +985,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -992,7 +993,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1003,6 +1003,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1010,7 +1011,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1021,6 +1021,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1028,7 +1029,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1039,6 +1039,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1046,7 +1047,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1057,6 +1057,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1064,7 +1065,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1075,6 +1075,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1082,7 +1083,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1093,6 +1093,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1100,7 +1101,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1111,6 +1111,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 8.380400000000014e-17, "min": 8.3804e-17, "max": 8.3804e-17, @@ -1118,7 +1119,6 @@ "q50": 8.3804e-17, "q75": 8.3804e-17, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1129,6 +1129,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -4.811599999999999, "min": -4.8116, "max": -4.8116, @@ -1136,7 +1137,6 @@ "q50": -4.8116, "q75": -4.8116, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1147,6 +1147,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4033000000000013, "min": -1.4033, "max": -1.4033, @@ -1154,7 +1155,6 @@ "q50": -1.4033, "q75": -1.4033, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1165,6 +1165,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.13686000000000026, "min": -0.13685999999999998, "max": -0.13685999999999998, @@ -1172,7 +1173,6 @@ "q50": -0.13685999999999998, "q75": -0.13685999999999998, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1183,6 +1183,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0024718999999999995, "min": 0.0024719, "max": 0.0024719, @@ -1190,7 +1191,6 @@ "q50": 0.0024719, "q75": 0.0024719, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1201,6 +1201,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1208,7 +1209,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1219,6 +1219,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 9.80270000000001e-16, "min": 9.8027e-16, "max": 9.8027e-16, @@ -1226,7 +1227,6 @@ "q50": 9.8027e-16, "q75": 9.8027e-16, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1237,6 +1237,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -55.64199999999992, "min": -55.641999999999996, "max": -55.641999999999996, @@ -1244,7 +1245,6 @@ "q50": -55.641999999999996, "q75": -55.641999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1255,6 +1255,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -16.311999999999976, "min": -16.312, "max": -16.312, @@ -1262,7 +1263,6 @@ "q50": -16.312, "q75": -16.312, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1273,6 +1273,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.2042, "min": -1.2042, "max": -1.2042, @@ -1280,7 +1281,6 @@ "q50": -1.2042, "q75": -1.2042, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1291,6 +1291,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.021675000000000052, "min": 0.021675, "max": 0.021675, @@ -1298,7 +1299,6 @@ "q50": 0.021675, "q75": 0.021675, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1309,6 +1309,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1316,7 +1317,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1327,6 +1327,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": 3.4097933333333326, "min": 3.4097, "max": 3.4098, @@ -1334,7 +1335,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.000025084128112200714, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1345,6 +1345,7 @@ "statistics": { "count": 90, "approx_unique": 10, + "null_percentage": 0.0, "avg": -0.3273734444444442, "min": -0.32742, "max": -0.32733, @@ -1352,7 +1353,6 @@ "q50": -0.32737, "q75": -0.32736, "std": 0.000019554654672256435, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1363,6 +1363,7 @@ "statistics": { "count": 90, "approx_unique": 7, + "null_percentage": 0.0, "avg": 0.9604327777777777, "min": 0.9604, "max": 0.96047, @@ -1370,7 +1371,6 @@ "q50": 0.96044, "q75": 0.96045, "std": 0.000017224739053348342, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1381,6 +1381,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": -3.7436955555555502, "min": -3.7437, "max": -3.7436, @@ -1388,7 +1389,6 @@ "q50": -3.7437, "q75": -3.7437, "std": 0.00002072349321507661, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1399,6 +1399,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": -1.0191022222222215, "min": -1.0192, "max": -1.0191, @@ -1406,7 +1407,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.000014823135407909618, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1417,6 +1417,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020500000000011, "min": -6.0205, "max": -6.0205, @@ -1424,7 +1425,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1435,6 +1435,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1442,7 +1443,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1453,6 +1453,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1460,7 +1461,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1471,6 +1471,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": -0.0, "max": -0.0, @@ -1478,7 +1479,6 @@ "q50": -0.0, "q75": -0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1489,6 +1489,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1496,7 +1497,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1507,6 +1507,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1514,7 +1515,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1525,6 +1525,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1532,7 +1533,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1543,6 +1543,7 @@ "statistics": { "count": 90, "approx_unique": 21, + "null_percentage": 0.0, "avg": 0.11991266666666668, "min": 0.09863999999999999, "max": 0.14795999999999998, @@ -1550,7 +1551,6 @@ "q50": 0.12105999999999999, "q75": 0.12554, "std": 0.009094154166276267, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1561,6 +1561,7 @@ "statistics": { "count": 90, "approx_unique": 13, + "null_percentage": 0.0, "avg": -6.548282222222224, "min": -6.5618, "max": -6.5304, @@ -1568,7 +1569,6 @@ "q50": -6.5483, "q75": -6.5438, "std": 0.007198089287322413, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1579,6 +1579,7 @@ "statistics": { "count": 90, "approx_unique": 17, + "null_percentage": 0.0, "avg": -2.815258888888891, "min": -2.8404, "max": -2.7888, @@ -1586,7 +1587,6 @@ "q50": -2.8135, "q75": -2.8089999999999997, "std": 0.00938972537254334, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1597,6 +1597,7 @@ "statistics": { "count": 90, "approx_unique": 20, + "null_percentage": 0.0, "avg": -0.8301134444444447, "min": -0.84335, "max": -0.8113199999999999, @@ -1604,7 +1605,6 @@ "q50": -0.82962, "q75": -0.82657, "std": 0.006226257764926416, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1615,6 +1615,7 @@ "statistics": { "count": 90, "approx_unique": 16, + "null_percentage": 0.0, "avg": 0.07021977777777792, "min": 0.056427, "max": 0.083877, @@ -1622,7 +1623,6 @@ "q50": 0.071677, "q75": 0.074727, "std": 0.0056365047695782315, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1633,6 +1633,7 @@ "statistics": { "count": 90, "approx_unique": 21, + "null_percentage": 0.0, "avg": -0.1953428888888889, "min": -0.21045999999999998, "max": -0.17995, @@ -1640,7 +1641,6 @@ "q50": -0.19444499999999998, "q75": -0.19063, "std": 0.005854349625541456, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1651,6 +1651,7 @@ "statistics": { "count": 90, "approx_unique": 4, + "null_percentage": 0.0, "avg": 0.12078599999999996, "min": 0.10985, "max": 0.12105999999999999, @@ -1658,7 +1659,6 @@ "q50": 0.12105999999999999, "q75": 0.12105999999999999, "std": 0.0015338991953753134, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1669,6 +1669,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.548299999999999, "min": -6.5483, "max": -6.5483, @@ -1676,7 +1677,6 @@ "q50": -6.5483, "q75": -6.5483, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1687,6 +1687,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -2.815699999999998, "min": -2.8157, "max": -2.8157, @@ -1694,7 +1695,6 @@ "q50": -2.8157, "q75": -2.8157, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1705,6 +1705,7 @@ "statistics": { "count": 90, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.8298086666666671, "min": -0.83267, "max": -0.82657, @@ -1712,7 +1713,6 @@ "q50": -0.8281, "q75": -0.8281, "std": 0.002282677414742483, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1723,6 +1723,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": 0.07018588888888903, "min": 0.07015199999999999, "max": 0.073202, @@ -1730,7 +1731,6 @@ "q50": 0.07015199999999999, "q75": 0.07015199999999999, "std": 0.00032149822878378714, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1741,6 +1741,7 @@ "statistics": { "count": 90, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.19520744444444438, "min": -0.19826, "max": -0.19216, @@ -1748,7 +1749,6 @@ "q50": -0.19368, "q75": -0.19368, "std": 0.0022294185068984654, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1759,6 +1759,7 @@ "statistics": { "count": 90, "approx_unique": 4, + "null_percentage": 0.0, "avg": 0.7699038888888889, "min": 0.76989, "max": 0.7699199999999999, @@ -1766,7 +1767,6 @@ "q50": 0.7699, "q75": 0.76991, "std": 6.982231774226693e-6, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1777,6 +1777,7 @@ "statistics": { "count": 90, "approx_unique": 6, + "null_percentage": 0.0, "avg": 0.4100198888888888, "min": 0.40998999999999997, "max": 0.41003999999999996, @@ -1784,7 +1785,6 @@ "q50": 0.41002, "q75": 0.41003, "std": 0.000010546253868626157, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1795,6 +1795,7 @@ "statistics": { "count": 90, "approx_unique": 48, + "null_percentage": 0.0, "avg": 0.08278952222222224, "min": 0.082732, "max": 0.082838, @@ -1802,7 +1803,6 @@ "q50": 0.08279, "q75": 0.082802, "std": 0.000018365960072599283, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1813,6 +1813,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": -1.409378888888891, "min": -1.4094, "max": -1.4093, @@ -1820,7 +1821,6 @@ "q50": -1.4094, "q75": -1.4094, "std": 0.00004103833353067418, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1831,6 +1831,7 @@ "statistics": { "count": 90, "approx_unique": 13, + "null_percentage": 0.0, "avg": 0.7859538888888887, "min": 0.78589, "max": 0.7860699999999999, @@ -1838,7 +1839,6 @@ "q50": 0.78595, "q75": 0.78598, "std": 0.00003175377335603696, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1849,6 +1849,7 @@ "statistics": { "count": 90, "approx_unique": 12, + "null_percentage": 0.0, "avg": -0.36820477777777777, "min": -0.36828, "max": -0.36815, @@ -1856,7 +1857,6 @@ "q50": -0.36821, "q75": -0.36818, "std": 0.000025625995306625105, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1867,6 +1867,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1874,7 +1875,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1885,6 +1885,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1892,7 +1893,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1903,6 +1903,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1910,7 +1911,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1921,6 +1921,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1928,7 +1929,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1939,6 +1939,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1946,7 +1947,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1957,6 +1957,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -1964,7 +1965,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1975,6 +1975,7 @@ "statistics": { "count": 90, "approx_unique": 86, + "null_percentage": 0.0, "avg": -22.28985555555556, "min": -24.633000000000003, "max": -20.959, @@ -1982,7 +1983,6 @@ "q50": -22.195, "q75": -21.851999999999997, "std": 0.6202698680723187, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -1993,6 +1993,7 @@ "statistics": { "count": 90, "approx_unique": 100, + "null_percentage": 0.0, "avg": -11.364411111111112, "min": -12.285, "max": -10.633, @@ -2000,7 +2001,6 @@ "q50": -11.3455, "q75": -11.142000000000001, "std": 0.3142770514111636, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2011,6 +2011,7 @@ "statistics": { "count": 90, "approx_unique": 92, + "null_percentage": 0.0, "avg": -18.554833333333338, "min": -18.916, "max": -18.05, @@ -2018,7 +2019,6 @@ "q50": -18.5795, "q75": -18.438, "std": 0.18080868590550767, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2029,6 +2029,7 @@ "statistics": { "count": 90, "approx_unique": 84, + "null_percentage": 0.0, "avg": -3.5600688888888885, "min": -3.9265, "max": -3.241, @@ -2036,7 +2037,6 @@ "q50": -3.5422000000000002, "q75": -3.4936, "std": 0.11474722061645332, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2047,6 +2047,7 @@ "statistics": { "count": 90, "approx_unique": 81, + "null_percentage": 0.0, "avg": 5.840498888888888, "min": 5.6508, "max": 6.0109, @@ -2054,7 +2055,6 @@ "q50": 5.8383, "q75": 5.8821, "std": 0.07474927303309553, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2065,6 +2065,7 @@ "statistics": { "count": 90, "approx_unique": 85, + "null_percentage": 0.0, "avg": -1.9630522222222215, "min": -2.167, "max": -1.7896, @@ -2072,7 +2073,6 @@ "q50": -1.95795, "q75": -1.9001, "std": 0.08460407357018439, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2083,6 +2083,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7699000000000005, "min": 0.7699, "max": 0.7699, @@ -2090,7 +2091,6 @@ "q50": 0.7699, "q75": 0.7699, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2101,6 +2101,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.4100399999999998, "min": 0.41003999999999996, "max": 0.41003999999999996, @@ -2108,7 +2109,6 @@ "q50": 0.41003999999999996, "q75": 0.41003999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2119,6 +2119,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.08278199999999993, "min": 0.08278200000000001, "max": 0.08278200000000001, @@ -2126,7 +2127,6 @@ "q50": 0.08278200000000001, "q75": 0.08278200000000001, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2137,6 +2137,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4094000000000022, "min": -1.4094, "max": -1.4094, @@ -2144,7 +2145,6 @@ "q50": -1.4094, "q75": -1.4094, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2155,6 +2155,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7860000000000009, "min": 0.7859999999999999, "max": 0.7859999999999999, @@ -2162,7 +2163,6 @@ "q50": 0.7859999999999999, "q75": 0.7859999999999999, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2173,6 +2173,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.3681300000000005, "min": -0.36813, "max": -0.36813, @@ -2180,7 +2181,6 @@ "q50": -0.36813, "q75": -0.36813, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2191,6 +2191,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2198,7 +2199,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2209,6 +2209,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2216,7 +2217,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2227,6 +2227,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2234,7 +2235,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2245,6 +2245,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2252,7 +2253,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2263,6 +2263,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2270,7 +2271,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2281,6 +2281,7 @@ "statistics": { "count": 90, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -2288,7 +2289,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2299,6 +2299,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.053000000000004, "min": 48.00899999999999, "max": 48.068999999999996, @@ -2306,7 +2307,6 @@ "q50": 48.068999999999996, "q75": 48.068999999999996, "std": 0.026681643734216377, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2317,6 +2317,7 @@ "statistics": { "count": 90, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.02033333333333, "min": 48.00899999999999, "max": 48.068999999999996, @@ -2324,7 +2325,6 @@ "q50": 48.00899999999999, "q75": 48.00899999999999, "std": 0.023616790827478015, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2335,6 +2335,7 @@ "statistics": { "count": 90, "approx_unique": 7, + "null_percentage": 0.0, "avg": 0.906720666666667, "min": 0.78776, "max": 1.11, @@ -2342,7 +2343,6 @@ "q50": 0.89518, "q75": 0.93099, "std": 0.05030257533112812, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2353,6 +2353,7 @@ "statistics": { "count": 90, "approx_unique": 6, + "null_percentage": 0.0, "avg": 47.85533333333337, "min": 47.818000000000005, "max": 47.895, @@ -2360,7 +2361,6 @@ "q50": 47.849, "q75": 47.864, "std": 0.022072403839090607, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2371,6 +2371,7 @@ "statistics": { "count": 90, "approx_unique": 5, + "null_percentage": 0.0, "avg": 47.898644444444464, "min": 47.864, "max": 47.925, @@ -2378,7 +2379,6 @@ "q50": 47.895, "q75": 47.925, "std": 0.021592919725505744, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2389,6 +2389,7 @@ "statistics": { "count": 90, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.801333333333375, "min": 47.788000000000004, "max": 47.818000000000005, @@ -2396,7 +2397,6 @@ "q50": 47.803000000000004, "q75": 47.818000000000005, "std": 0.013196867180159553, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2407,6 +2407,7 @@ "statistics": { "count": 90, "approx_unique": 4, + "null_percentage": 0.0, "avg": 47.830166666666635, "min": 47.803000000000004, "max": 47.849, @@ -2414,7 +2415,6 @@ "q50": 47.833999999999996, "q75": 47.833999999999996, "std": 0.014095381345705139, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2425,6 +2425,7 @@ "statistics": { "count": 90, "approx_unique": 4, + "null_percentage": 0.0, "avg": 47.95866666666665, "min": 47.94, "max": 47.986000000000004, @@ -2432,7 +2433,6 @@ "q50": 47.955, "q75": 47.971000000000004, "std": 0.014229262458485287, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2443,6 +2443,7 @@ "statistics": { "count": 90, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.96328888888889, "min": 47.94, "max": 47.971000000000004, @@ -2450,7 +2451,6 @@ "q50": 47.971000000000004, "q75": 47.971000000000004, "std": 0.010811193163940106, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2460,13 +2460,14 @@ "validation": { "name": "features.validation", "path": "pipeline/feature_sets/features.validation.parquet", - "column_profile": { + "columns": { "f_x": { "name": "f_x", "role": "target", "statistics": { "count": 30, "approx_unique": 32, + "null_percentage": 0.0, "avg": -10.5642, "min": -10.932, "max": -10.06, @@ -2474,7 +2475,6 @@ "q50": -10.575500000000002, "q75": -10.447000000000001, "std": 0.23268054169345292, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -2485,6 +2485,7 @@ "statistics": { "count": 30, "approx_unique": 26, + "null_percentage": 0.0, "avg": 6.4535, "min": 5.93, "max": 6.7792, @@ -2492,7 +2493,6 @@ "q50": 6.4831, "q75": 6.56, "std": 0.21875581253361343, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -2503,6 +2503,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -7.461039999999999, "min": -7.66, "max": -7.2267, @@ -2510,7 +2511,6 @@ "q50": -7.48, "q75": -7.3936, "std": 0.11093529832200898, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -2521,6 +2521,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": 0.19332420441436537, "min": -0.06930364540062461, "max": 1.3037535132510685, @@ -2528,7 +2529,6 @@ "q50": 0.14291324555132306, "q75": 0.24721962075109202, "std": 0.22649552097283404, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2539,6 +2539,7 @@ "statistics": { "count": 30, "approx_unique": 18, + "null_percentage": 0.0, "avg": -0.24948452825416273, "min": -0.4664868527180687, "max": -0.008751983051116944, @@ -2546,7 +2547,6 @@ "q50": -0.2487903780085609, "q75": -0.1959040018260866, "std": 0.09273869760797071, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2557,6 +2557,7 @@ "statistics": { "count": 30, "approx_unique": 33, + "null_percentage": 0.0, "avg": -0.03220946094021907, "min": -0.5107104892437428, "max": 0.33288120717118047, @@ -2564,7 +2565,6 @@ "q50": 0.00014504147973003895, "q75": 0.1316464123291316, "std": 0.23310081717400694, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2575,6 +2575,7 @@ "statistics": { "count": 30, "approx_unique": 19, + "null_percentage": 0.0, "avg": 0.08732083783309112, "min": -0.10287030322377438, "max": 0.17187940863731502, @@ -2582,7 +2583,6 @@ "q50": 0.08029617135028516, "q75": 0.09118402694709957, "std": 0.05114212039632681, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2593,6 +2593,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": 0.07948716255068788, "min": -0.10932380846978837, "max": 0.26991479552895076, @@ -2600,7 +2601,6 @@ "q50": 0.07208644575746953, "q75": 0.10782668921811735, "std": 0.08845071646577753, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2611,6 +2611,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": -0.18233280354848666, "min": -0.4827235133296347, "max": 0.1358070895207469, @@ -2618,7 +2619,6 @@ "q50": -0.21407185345596058, "q75": -0.02388809656338143, "std": 0.19310740054288641, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2629,6 +2629,7 @@ "statistics": { "count": 30, "approx_unique": 28, + "null_percentage": 0.0, "avg": 1.0641570740696455, "min": 0.777644136584092, "max": 1.2448605057965791, @@ -2636,7 +2637,6 @@ "q50": 1.0680944411361768, "q75": 1.1700734819881216, "std": 0.11858626770232168, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2647,6 +2647,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -0.1551730746013871, "min": -0.473075626179459, "max": 0.10310515928743302, @@ -2654,7 +2655,6 @@ "q50": -0.14625013307001683, "q75": -0.03407518465416763, "std": 0.16971366251454276, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2665,6 +2665,7 @@ "statistics": { "count": 30, "approx_unique": 4, + "null_percentage": 0.0, "avg": 0.15834076564615415, "min": 0.033101174425396875, "max": 1.8107382662249447, @@ -2672,7 +2673,6 @@ "q50": 0.033101174425396875, "q75": 0.033101174425396875, "std": 0.40069341721501156, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2683,6 +2683,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": 0.10901962726390516, "min": -0.08377860442841095, "max": 0.2605343481036355, @@ -2690,7 +2691,6 @@ "q50": 0.10697378018021675, "q75": 0.16899039675248545, "std": 0.08159971392741276, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2701,6 +2701,7 @@ "statistics": { "count": 30, "approx_unique": 21, + "null_percentage": 0.0, "avg": -1.0453845647173199, "min": -1.3460824965880067, "max": -0.6735719458851226, @@ -2708,7 +2709,6 @@ "q50": -1.0286035881182158, "q75": -0.8803244767013989, "std": 0.20481309067880235, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2719,6 +2719,7 @@ "statistics": { "count": 30, "approx_unique": 15, + "null_percentage": 0.0, "avg": -0.5661851957649622, "min": -0.8187028842777508, "max": -0.22545488552981147, @@ -2726,7 +2727,6 @@ "q50": -0.5648414652788843, "q75": -0.5257859905450427, "std": 0.1487310558710614, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2737,6 +2737,7 @@ "statistics": { "count": 30, "approx_unique": 21, + "null_percentage": 0.0, "avg": -0.41623350214906585, "min": -0.7696245032865708, "max": -0.014361416902144361, @@ -2744,7 +2745,6 @@ "q50": -0.38996440289319434, "q75": -0.34533151657601335, "std": 0.16155216436175804, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2755,6 +2755,7 @@ "statistics": { "count": 30, "approx_unique": 18, + "null_percentage": 0.0, "avg": -0.2978326158779928, "min": -0.5050112572399559, "max": 0.028664635281663388, @@ -2762,7 +2763,6 @@ "q50": -0.22089972352325732, "q75": -0.16620112071181375, "std": 0.16352894948978416, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2773,6 +2773,7 @@ "statistics": { "count": 30, "approx_unique": 23, + "null_percentage": 0.0, "avg": 0.0409853052121245, "min": -0.12182810138649722, "max": 0.5423670291968665, @@ -2780,7 +2781,6 @@ "q50": 0.0407825927700286, "q75": 0.09142159288139813, "std": 0.12110415920479999, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2791,6 +2791,7 @@ "statistics": { "count": 30, "approx_unique": 25, + "null_percentage": 0.0, "avg": -0.2055093204130946, "min": -0.4627830029525615, "max": 0.35934682649676397, @@ -2798,7 +2799,6 @@ "q50": -0.22179819211792431, "q75": -0.09406641645971225, "std": 0.17840704413673253, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2809,6 +2809,7 @@ "statistics": { "count": 30, "approx_unique": 19, + "null_percentage": 0.0, "avg": 2.142361172203464, "min": 1.7685768462687088, "max": 2.570489509920782, @@ -2816,7 +2817,6 @@ "q50": 2.190929511201602, "q75": 2.3007878576168435, "std": 0.22293632391972465, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2827,6 +2827,7 @@ "statistics": { "count": 30, "approx_unique": 23, + "null_percentage": 0.0, "avg": -0.06373529289250557, "min": -0.4231871228845855, "max": 0.27229675050865815, @@ -2834,7 +2835,6 @@ "q50": -0.05703426232304057, "q75": 0.09012103816953052, "std": 0.20660065875996825, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2845,6 +2845,7 @@ "statistics": { "count": 30, "approx_unique": 18, + "null_percentage": 0.0, "avg": -7.643093988399562, "min": -8.050995225861147, "max": -7.409897130096452, @@ -2852,7 +2853,6 @@ "q50": -7.6641613370213175, "q75": -7.473070550207218, "std": 0.15778718235301686, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2863,6 +2863,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -0.7530513784013607, "min": -1.3387934288398122, "max": -0.20416046725912887, @@ -2870,7 +2871,6 @@ "q50": -0.7563854503126777, "q75": -0.5573693981246907, "std": 0.31863069066844263, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2881,6 +2881,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": -0.13134804308484582, "min": -0.15961375991220164, "max": -0.0024689568783826856, @@ -2888,7 +2889,6 @@ "q50": -0.15961375991220164, "q75": -0.07326042218884443, "std": 0.04563013152071064, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2899,6 +2899,7 @@ "statistics": { "count": 30, "approx_unique": 17, + "null_percentage": 0.0, "avg": 0.1496585775117086, "min": 0.058092614857786516, "max": 0.3560530253389044, @@ -2906,7 +2907,6 @@ "q50": 0.12479216629262262, "q75": 0.19222745792740756, "std": 0.07679729757155859, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2917,6 +2917,7 @@ "statistics": { "count": 30, "approx_unique": 7, + "null_percentage": 0.0, "avg": -0.15198687814246747, "min": -0.35791488516837033, "max": 0.07389933055073145, @@ -2924,7 +2925,6 @@ "q50": -0.13404160537759308, "q75": -0.13404160537759308, "std": 0.0864736053500799, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2935,6 +2935,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.11632438554282588, "min": -0.20960742932896942, "max": 0.010684053390404269, @@ -2942,7 +2943,6 @@ "q50": -0.15991801391544605, "q75": -0.10038447275439095, "std": 0.06247110544464339, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2953,6 +2953,7 @@ "statistics": { "count": 30, "approx_unique": 26, + "null_percentage": 0.0, "avg": -0.0984510164977664, "min": -0.34489103667245363, "max": 0.2165068431882982, @@ -2960,7 +2961,6 @@ "q50": -0.11103071738897047, "q75": -0.013643570961072231, "std": 0.13788035215100206, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2971,6 +2971,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.36852929713627697, "min": -0.3685292971362768, "max": -0.3685292971362768, @@ -2978,7 +2979,6 @@ "q50": -0.3685292971362768, "q75": -0.3685292971362768, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -2989,6 +2989,7 @@ "statistics": { "count": 30, "approx_unique": 13, + "null_percentage": 0.0, "avg": -0.4439899096092964, "min": -0.5352447770470662, "max": -0.33798482353938075, @@ -2996,7 +2997,6 @@ "q50": -0.44312172002842065, "q75": -0.4183878615399482, "std": 0.05295072766278327, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3007,6 +3007,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": -0.11044296854216397, "min": -0.13463080278231188, "max": 0.01896951033776234, @@ -3014,7 +3015,6 @@ "q50": -0.12906911404610488, "q75": -0.12906911404610488, "std": 0.0516751566527303, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3025,6 +3025,7 @@ "statistics": { "count": 30, "approx_unique": 18, + "null_percentage": 0.0, "avg": -0.3792580928351808, "min": -0.5013261748674661, "max": -0.2537691424216535, @@ -3032,7 +3033,6 @@ "q50": -0.3981624767174681, "q75": -0.36719544571537716, "std": 0.060455838588677244, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3043,6 +3043,7 @@ "statistics": { "count": 30, "approx_unique": 20, + "null_percentage": 0.0, "avg": 0.022939542302922975, "min": -0.0755313506940955, "max": 0.1304724724615783, @@ -3050,7 +3051,6 @@ "q50": 0.04406501132859475, "q75": 0.06399773833237647, "std": 0.05775911514165324, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3061,6 +3061,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 3.409800000000001, "min": 3.4098, "max": 3.4098, @@ -3068,7 +3069,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3079,6 +3079,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.3273700000000001, "min": -0.32737, "max": -0.32737, @@ -3086,7 +3087,6 @@ "q50": -0.32737, "q75": -0.32737, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3097,6 +3097,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.9604399999999994, "min": 0.96044, "max": 0.96044, @@ -3104,7 +3105,6 @@ "q50": 0.96044, "q75": 0.96044, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3115,6 +3115,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -3.7436000000000003, "min": -3.7436, "max": -3.7436, @@ -3122,7 +3123,6 @@ "q50": -3.7436, "q75": -3.7436, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3133,6 +3133,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.0191000000000008, "min": -1.0191, "max": -1.0191, @@ -3140,7 +3141,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3151,6 +3151,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020499999999999, "min": -6.0205, "max": -6.0205, @@ -3158,7 +3159,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3169,6 +3169,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3176,7 +3177,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3187,6 +3187,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3194,7 +3195,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3205,6 +3205,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3212,7 +3213,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3223,6 +3223,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3230,7 +3231,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3241,6 +3241,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3248,7 +3249,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3259,6 +3259,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3266,7 +3267,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3277,6 +3277,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3284,7 +3285,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3295,6 +3295,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3302,7 +3303,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3313,6 +3313,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3320,7 +3321,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3331,6 +3331,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3338,7 +3339,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3349,6 +3349,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3356,7 +3357,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3367,6 +3367,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3374,7 +3375,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3385,6 +3385,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 8.380399999999998e-17, "min": 8.3804e-17, "max": 8.3804e-17, @@ -3392,7 +3393,6 @@ "q50": 8.3804e-17, "q75": 8.3804e-17, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3403,6 +3403,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -4.811599999999999, "min": -4.8116, "max": -4.8116, @@ -3410,7 +3411,6 @@ "q50": -4.8116, "q75": -4.8116, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3421,6 +3421,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4033000000000009, "min": -1.4033, "max": -1.4033, @@ -3428,7 +3429,6 @@ "q50": -1.4033, "q75": -1.4033, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3439,6 +3439,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.13685999999999998, "min": -0.13685999999999998, "max": -0.13685999999999998, @@ -3446,7 +3447,6 @@ "q50": -0.13685999999999998, "q75": -0.13685999999999998, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3457,6 +3457,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0024718999999999995, "min": 0.0024719, "max": 0.0024719, @@ -3464,7 +3465,6 @@ "q50": 0.0024719, "q75": 0.0024719, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3475,6 +3475,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3482,7 +3483,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3493,6 +3493,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 9.802700000000004e-16, "min": 9.8027e-16, "max": 9.8027e-16, @@ -3500,7 +3501,6 @@ "q50": 9.8027e-16, "q75": 9.8027e-16, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3511,6 +3511,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -55.64200000000003, "min": -55.641999999999996, "max": -55.641999999999996, @@ -3518,7 +3519,6 @@ "q50": -55.641999999999996, "q75": -55.641999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3529,6 +3529,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -16.31200000000001, "min": -16.312, "max": -16.312, @@ -3536,7 +3537,6 @@ "q50": -16.312, "q75": -16.312, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3547,6 +3547,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.2042, "min": -1.2042, "max": -1.2042, @@ -3554,7 +3555,6 @@ "q50": -1.2042, "q75": -1.2042, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3565,6 +3565,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.021675, "min": 0.021675, "max": 0.021675, @@ -3572,7 +3573,6 @@ "q50": 0.021675, "q75": 0.021675, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3583,6 +3583,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3590,7 +3591,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3601,6 +3601,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 3.4097866666666676, "min": 3.4097, "max": 3.4098, @@ -3608,7 +3609,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.000034574590364305684, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3619,6 +3619,7 @@ "statistics": { "count": 30, "approx_unique": 7, + "null_percentage": 0.0, "avg": -0.3273783333333332, "min": -0.32741, "max": -0.32734, @@ -3626,7 +3627,6 @@ "q50": -0.32737, "q75": -0.32736, "std": 0.000020356026530054445, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3637,6 +3637,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": 0.9604276666666665, "min": 0.9604, "max": 0.96045, @@ -3644,7 +3645,6 @@ "q50": 0.96042, "q75": 0.96044, "std": 0.000015465943304497223, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3655,6 +3655,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -3.743700000000001, "min": -3.7437, "max": -3.7437, @@ -3662,7 +3663,6 @@ "q50": -3.7437, "q75": -3.7437, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3673,6 +3673,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": -1.019103333333334, "min": -1.0192, "max": -1.0191, @@ -3680,7 +3681,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.0000182574185835436, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3691,6 +3691,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020499999999999, "min": -6.0205, "max": -6.0205, @@ -3698,7 +3699,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3709,6 +3709,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3716,7 +3717,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3727,6 +3727,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3734,7 +3735,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3745,6 +3745,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": -0.0, "max": -0.0, @@ -3752,7 +3753,6 @@ "q50": -0.0, "q75": -0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3763,6 +3763,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3770,7 +3771,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3781,6 +3781,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3788,7 +3789,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3799,6 +3799,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -3806,7 +3807,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3817,6 +3817,7 @@ "statistics": { "count": 30, "approx_unique": 16, + "null_percentage": 0.0, "avg": 0.123375, "min": 0.09863999999999999, "max": 0.13899, @@ -3824,7 +3825,6 @@ "q50": 0.12442, "q75": 0.13003, "std": 0.009410180015867632, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3835,6 +3835,7 @@ "statistics": { "count": 30, "approx_unique": 11, + "null_percentage": 0.0, "avg": -6.548706666666667, "min": -6.5685, "max": -6.5371, @@ -3842,7 +3843,6 @@ "q50": -6.5483, "q75": -6.5416, "std": 0.007517975393880323, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3853,6 +3853,7 @@ "statistics": { "count": 30, "approx_unique": 11, + "null_percentage": 0.0, "avg": -2.8156400000000006, "min": -2.8292, "max": -2.8023, @@ -3860,7 +3861,6 @@ "q50": -2.8157, "q75": -2.8089999999999997, "std": 0.0075047846806852985, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3871,6 +3871,7 @@ "statistics": { "count": 30, "approx_unique": 16, + "null_percentage": 0.0, "avg": -0.8293686666666668, "min": -0.8418200000000001, "max": -0.81742, @@ -3878,7 +3879,6 @@ "q50": -0.8281, "q75": -0.82505, "std": 0.005721133679150738, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3889,6 +3889,7 @@ "statistics": { "count": 30, "approx_unique": 13, + "null_percentage": 0.0, "avg": 0.06994866666666667, "min": 0.059476999999999995, "max": 0.079302, @@ -3896,7 +3897,6 @@ "q50": 0.068627, "q75": 0.073202, "std": 0.005077370065458026, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3907,6 +3907,7 @@ "statistics": { "count": 30, "approx_unique": 15, + "null_percentage": 0.0, "avg": -0.19703766666666664, "min": -0.20893, "max": -0.18758, @@ -3914,7 +3915,6 @@ "q50": -0.19673, "q75": -0.19216, "std": 0.005423127792839781, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3925,6 +3925,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.11881999999999995, "min": 0.11882000000000001, "max": 0.11882000000000001, @@ -3932,7 +3933,6 @@ "q50": 0.11882000000000001, "q75": 0.11882000000000001, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3943,6 +3943,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.548300000000003, "min": -6.5483, "max": -6.5483, @@ -3950,7 +3951,6 @@ "q50": -6.5483, "q75": -6.5483, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3961,6 +3961,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -2.8157000000000014, "min": -2.8157, "max": -2.8157, @@ -3968,7 +3969,6 @@ "q50": -2.8157, "q75": -2.8157, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3979,6 +3979,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.8296739999999997, "min": -0.83267, "max": -0.82657, @@ -3986,7 +3987,6 @@ "q50": -0.8281, "q75": -0.8281, "std": 0.0020978159414348323, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -3997,6 +3997,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.070152, "min": 0.07015199999999999, "max": 0.07015199999999999, @@ -4004,7 +4005,6 @@ "q50": 0.07015199999999999, "q75": 0.07015199999999999, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4015,6 +4015,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.19673366666666656, "min": -0.19826, "max": -0.19216, @@ -4022,7 +4023,6 @@ "q50": -0.19826, "q75": -0.19521, "std": 0.0020824959880465267, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4033,6 +4033,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": 0.7699036666666664, "min": 0.76988, "max": 0.7699199999999999, @@ -4040,7 +4041,6 @@ "q50": 0.7699, "q75": 0.76991, "std": 8.899179866617674e-6, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4051,6 +4051,7 @@ "statistics": { "count": 30, "approx_unique": 7, + "null_percentage": 0.0, "avg": 0.4100196666666667, "min": 0.40998999999999997, "max": 0.41005, @@ -4058,7 +4059,6 @@ "q50": 0.41002, "q75": 0.41003, "std": 0.000013514572807192662, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4069,6 +4069,7 @@ "statistics": { "count": 30, "approx_unique": 28, + "null_percentage": 0.0, "avg": 0.08279593333333332, "min": 0.08276900000000001, "max": 0.082833, @@ -4076,7 +4077,6 @@ "q50": 0.082794, "q75": 0.08281000000000001, "std": 0.00001697042728686539, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4087,6 +4087,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": -1.4093799999999999, "min": -1.4094, "max": -1.4093, @@ -4094,7 +4095,6 @@ "q50": -1.4094, "q75": -1.4094, "std": 0.00004068381021720471, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4105,6 +4105,7 @@ "statistics": { "count": 30, "approx_unique": 10, + "null_percentage": 0.0, "avg": 0.7859436666666667, "min": 0.78588, "max": 0.78601, @@ -4112,7 +4113,6 @@ "q50": 0.78594, "q75": 0.78596, "std": 0.000030112814700618006, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4123,6 +4123,7 @@ "statistics": { "count": 30, "approx_unique": 11, + "null_percentage": 0.0, "avg": -0.3682093333333333, "min": -0.36826, "max": -0.36815, @@ -4130,7 +4131,6 @@ "q50": -0.3682, "q75": -0.36819, "std": 0.000029117281671781178, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4141,6 +4141,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4148,7 +4149,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4159,6 +4159,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4166,7 +4167,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4177,6 +4177,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4184,7 +4185,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4195,6 +4195,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4202,7 +4203,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4213,6 +4213,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4220,7 +4221,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4231,6 +4231,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4238,7 +4239,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4249,6 +4249,7 @@ "statistics": { "count": 30, "approx_unique": 28, + "null_percentage": 0.0, "avg": -22.26903333333333, "min": -23.305, "max": -21.339000000000002, @@ -4256,7 +4257,6 @@ "q50": -22.258499999999998, "q75": -21.991, "std": 0.5447219780156962, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4267,6 +4267,7 @@ "statistics": { "count": 30, "approx_unique": 30, + "null_percentage": 0.0, "avg": -11.434133333333332, "min": -12.095, "max": -10.603, @@ -4274,7 +4275,6 @@ "q50": -11.4335, "q75": -11.258, "std": 0.32464003761549265, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4285,6 +4285,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": -18.563200000000002, "min": -19.077, "max": -18.233, @@ -4292,7 +4293,6 @@ "q50": -18.512999999999998, "q75": -18.447, "std": 0.1988744882305561, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4303,6 +4303,7 @@ "statistics": { "count": 30, "approx_unique": 29, + "null_percentage": 0.0, "avg": -3.5288433333333336, "min": -3.7051, "max": -3.2735, @@ -4310,7 +4311,6 @@ "q50": -3.56235, "q75": -3.4771, "std": 0.10279882155279635, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4321,6 +4321,7 @@ "statistics": { "count": 30, "approx_unique": 31, + "null_percentage": 0.0, "avg": 5.842810000000001, "min": 5.7124, "max": 5.9927, @@ -4328,7 +4329,6 @@ "q50": 5.84455, "q75": 5.8891, "std": 0.06915108072559123, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4339,6 +4339,7 @@ "statistics": { "count": 30, "approx_unique": 32, + "null_percentage": 0.0, "avg": -1.9544200000000003, "min": -2.1053, "max": -1.8188, @@ -4346,7 +4347,6 @@ "q50": -1.9617, "q75": -1.9151, "std": 0.06628655868325775, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4357,6 +4357,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7698999999999999, "min": 0.7699, "max": 0.7699, @@ -4364,7 +4365,6 @@ "q50": 0.7699, "q75": 0.7699, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4375,6 +4375,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.41004000000000024, "min": 0.41003999999999996, "max": 0.41003999999999996, @@ -4382,7 +4383,6 @@ "q50": 0.41003999999999996, "q75": 0.41003999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4393,6 +4393,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.08278199999999995, "min": 0.08278200000000001, "max": 0.08278200000000001, @@ -4400,7 +4401,6 @@ "q50": 0.08278200000000001, "q75": 0.08278200000000001, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4411,6 +4411,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4094000000000002, "min": -1.4094, "max": -1.4094, @@ -4418,7 +4419,6 @@ "q50": -1.4094, "q75": -1.4094, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4429,6 +4429,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7860000000000001, "min": 0.7859999999999999, "max": 0.7859999999999999, @@ -4436,7 +4437,6 @@ "q50": 0.7859999999999999, "q75": 0.7859999999999999, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4447,6 +4447,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.3681300000000001, "min": -0.36813, "max": -0.36813, @@ -4454,7 +4455,6 @@ "q50": -0.36813, "q75": -0.36813, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4465,6 +4465,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4472,7 +4473,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4483,6 +4483,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4490,7 +4491,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4501,6 +4501,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4508,7 +4509,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4519,6 +4519,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4526,7 +4527,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4537,6 +4537,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4544,7 +4545,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4555,6 +4555,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -4562,7 +4563,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4573,6 +4573,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.054999999999986, "min": 48.00899999999999, "max": 48.068999999999996, @@ -4580,7 +4581,6 @@ "q50": 48.068999999999996, "q75": 48.068999999999996, "std": 0.025810984029125853, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4591,6 +4591,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.01300000000001, "min": 48.00899999999999, "max": 48.068999999999996, @@ -4598,7 +4599,6 @@ "q50": 48.00899999999999, "q75": 48.00899999999999, "std": 0.015222487902148182, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4609,6 +4609,7 @@ "statistics": { "count": 30, "approx_unique": 6, + "null_percentage": 0.0, "avg": 0.9095060000000003, "min": 0.8593799999999999, "max": 1.0384, @@ -4616,7 +4617,6 @@ "q50": 0.89518, "q75": 0.93099, "std": 0.05115782709521046, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4627,6 +4627,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.85499999999999, "min": 47.849, "max": 47.864, @@ -4634,7 +4635,6 @@ "q50": 47.849, "q75": 47.864, "std": 0.007474093186834875, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4645,6 +4645,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.891733333333335, "min": 47.879, "max": 47.91, @@ -4652,7 +4653,6 @@ "q50": 47.895, "q75": 47.895, "std": 0.008665959299163157, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4663,6 +4663,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.793, "min": 47.773, "max": 47.818000000000005, @@ -4670,7 +4671,6 @@ "q50": 47.788000000000004, "q75": 47.818000000000005, "std": 0.019028109877683497, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4681,6 +4681,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.82866666666668, "min": 47.818000000000005, "max": 47.833999999999996, @@ -4688,7 +4689,6 @@ "q50": 47.833999999999996, "q75": 47.833999999999996, "std": 0.007671412823760338, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4699,6 +4699,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.96193333333334, "min": 47.955, "max": 47.971000000000004, @@ -4706,7 +4707,6 @@ "q50": 47.955, "q75": 47.971000000000004, "std": 0.008064110927902024, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4717,6 +4717,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.960333333333324, "min": 47.955, "max": 47.971000000000004, @@ -4724,7 +4725,6 @@ "q50": 47.955, "q75": 47.971000000000004, "std": 0.007671412823770011, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4734,13 +4734,14 @@ "test": { "name": "features.test", "path": "pipeline/feature_sets/features.test.parquet", - "column_profile": { + "columns": { "f_x": { "name": "f_x", "role": "target", "statistics": { "count": 30, "approx_unique": 26, + "null_percentage": 0.0, "avg": -10.919066666666668, "min": -11.28, "max": -10.503, @@ -4748,7 +4749,6 @@ "q50": -10.930499999999999, "q75": -10.720999999999998, "std": 0.21154520725519532, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -4759,6 +4759,7 @@ "statistics": { "count": 30, "approx_unique": 26, + "null_percentage": 0.0, "avg": 6.5138, "min": 5.93, "max": 6.83, @@ -4766,7 +4767,6 @@ "q50": 6.57335, "q75": 6.63, "std": 0.21305850095123235, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -4777,6 +4777,7 @@ "statistics": { "count": 30, "approx_unique": 30, + "null_percentage": 0.0, "avg": -7.708129999999998, "min": -7.89, "max": -7.48, @@ -4784,7 +4785,6 @@ "q50": -7.7212, "q75": -7.6182, "std": 0.10587867210584265, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "target" } @@ -4795,6 +4795,7 @@ "statistics": { "count": 30, "approx_unique": 25, + "null_percentage": 0.0, "avg": 0.1524093562359838, "min": -0.06230057569151897, "max": 0.25400403786183595, @@ -4802,7 +4803,6 @@ "q50": 0.15984646375779327, "q75": 0.20339739351712569, "std": 0.0706293760823357, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4813,6 +4813,7 @@ "statistics": { "count": 30, "approx_unique": 21, + "null_percentage": 0.0, "avg": 0.03241664953586977, "min": -0.29239587203981093, "max": 0.23128641190632715, @@ -4820,7 +4821,6 @@ "q50": 0.04413439313135731, "q75": 0.1006685270806223, "std": 0.1279342283722369, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4831,6 +4831,7 @@ "statistics": { "count": 30, "approx_unique": 23, + "null_percentage": 0.0, "avg": -0.43253947351574656, "min": -0.7587194442079126, "max": 0.1432710290186659, @@ -4838,7 +4839,6 @@ "q50": -0.4596460783923007, "q75": -0.39242367993705063, "std": 0.24088395202452934, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4849,6 +4849,7 @@ "statistics": { "count": 30, "approx_unique": 15, + "null_percentage": 0.0, "avg": 0.04567194367173819, "min": -0.23509573529982242, "max": 0.4466291204984043, @@ -4856,7 +4857,6 @@ "q50": 0.016349979410628174, "q75": 0.14343879457285652, "std": 0.17580667325784777, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4867,6 +4867,7 @@ "statistics": { "count": 30, "approx_unique": 28, + "null_percentage": 0.0, "avg": 0.12023417368176141, "min": -0.10932380846978833, "max": 0.46135286868736036, @@ -4874,7 +4875,6 @@ "q50": 0.09585168094504876, "q75": 0.24087551771746007, "std": 0.16892188587797324, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4885,6 +4885,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": -0.4166301888270923, "min": -0.7796393920510681, "max": -0.03525080894104533, @@ -4892,7 +4893,6 @@ "q50": -0.4128996264196124, "q75": -0.3117040267141978, "std": 0.16347572033952537, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4903,6 +4903,7 @@ "statistics": { "count": 30, "approx_unique": 25, + "null_percentage": 0.0, "avg": 0.918130347325347, "min": 0.6486824075950116, "max": 1.121470461434972, @@ -4910,7 +4911,6 @@ "q50": 0.9228435555856194, "q75": 1.0715823980594366, "std": 0.14842298685041855, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4921,6 +4921,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -0.3567050935918786, "min": -0.7383477819169229, "max": 0.030083152675448463, @@ -4928,7 +4929,6 @@ "q50": -0.32224620438195983, "q75": -0.18380999726655428, "std": 0.2002888273859639, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4939,6 +4939,7 @@ "statistics": { "count": 30, "approx_unique": 13, + "null_percentage": 0.0, "avg": 0.05829206339795194, "min": -0.2198081981954684, "max": 0.17948112893216206, @@ -4946,7 +4947,6 @@ "q50": 0.033101174425396875, "q75": 0.1224927800654327, "std": 0.09523495287817127, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4957,6 +4957,7 @@ "statistics": { "count": 30, "approx_unique": 26, + "null_percentage": 0.0, "avg": 0.12940802492939402, "min": -0.19164798475819547, "max": 0.21368313059326913, @@ -4964,7 +4965,6 @@ "q50": 0.13339141188119694, "q75": 0.1690762967787726, "std": 0.07265840413281201, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4975,6 +4975,7 @@ "statistics": { "count": 30, "approx_unique": 15, + "null_percentage": 0.0, "avg": -0.49923308197242283, "min": -1.3881557621548986, "max": -0.05066862814291171, @@ -4982,7 +4983,6 @@ "q50": -0.4404320352041622, "q75": -0.29391849229971095, "std": 0.3208253139015117, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -4993,6 +4993,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": 0.16156751192846977, "min": 0.004179481300259469, "max": 0.48849840893503926, @@ -5000,7 +5001,6 @@ "q50": 0.0674620082028966, "q75": 0.2970963750329675, "std": 0.15409976759311245, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5011,6 +5011,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": -0.5107909840373824, "min": -0.7696245032865707, "max": -0.2667919647640081, @@ -5018,7 +5019,6 @@ "q50": -0.5204723129394886, "q75": -0.3899644028931944, "std": 0.12330020337005919, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5029,6 +5029,7 @@ "statistics": { "count": 30, "approx_unique": 17, + "null_percentage": 0.0, "avg": -0.22393787099431764, "min": -0.34112747590422793, "max": -0.08158031589641734, @@ -5036,7 +5037,6 @@ "q50": -0.28705178698849426, "q75": -0.11235286786962002, "std": 0.09662384468603932, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5047,6 +5047,7 @@ "statistics": { "count": 30, "approx_unique": 19, + "null_percentage": 0.0, "avg": 0.563274819619861, "min": 0.24173017217448634, "max": 0.8464391786647661, @@ -5054,7 +5055,6 @@ "q50": 0.5754017572824353, "q75": 0.6091610906900148, "std": 0.1265117123901637, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5065,6 +5065,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": 0.08817291239996682, "min": -0.07347628465893369, "max": 0.4224675844127627, @@ -5072,7 +5073,6 @@ "q50": 0.08422672772450158, "q75": 0.13447072817772077, "std": 0.0988765069914432, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5083,6 +5083,7 @@ "statistics": { "count": 30, "approx_unique": 24, + "null_percentage": 0.0, "avg": 2.2127116188096356, "min": 1.6677139776810554, "max": 2.7105559608728185, @@ -5090,7 +5091,6 @@ "q50": 2.21764125242753, "q75": 2.5039235973072005, "std": 0.28303943219067185, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5101,6 +5101,7 @@ "statistics": { "count": 30, "approx_unique": 21, + "null_percentage": 0.0, "avg": 0.21082238100376696, "min": -0.021521741825759763, "max": 0.5758245193625116, @@ -5108,7 +5109,6 @@ "q50": 0.19142773671987798, "q75": 0.2965081107847877, "std": 0.12956086883976992, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5119,6 +5119,7 @@ "statistics": { "count": 30, "approx_unique": 15, + "null_percentage": 0.0, "avg": -7.473636258432962, "min": -7.7060621499240565, "max": -7.272973962251777, @@ -5126,7 +5127,6 @@ "q50": -7.4486865221524745, "q75": -7.409897130096452, "std": 0.11580130671422861, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5137,6 +5137,7 @@ "statistics": { "count": 30, "approx_unique": 11, + "null_percentage": 0.0, "avg": -0.04675299224970586, "min": -0.34262789316886166, "max": 0.1282850912346699, @@ -5144,7 +5145,6 @@ "q50": -0.02534497036266615, "q75": 0.04721966669048455, "std": 0.13398483070150566, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5155,6 +5155,7 @@ "statistics": { "count": 30, "approx_unique": 21, + "null_percentage": 0.0, "avg": -0.23511552117431594, "min": -0.45715727202993256, "max": 0.17140128546897984, @@ -5162,7 +5163,6 @@ "q50": -0.22639023793816065, "q75": -0.15961375991220164, "std": 0.13414475414084567, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5173,6 +5173,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": 0.08145059913486928, "min": -0.21468467631910326, "max": 0.3000717669791855, @@ -5180,7 +5181,6 @@ "q50": 0.08884513685475545, "q75": 0.13877452381845268, "std": 0.11112937112930686, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5191,6 +5191,7 @@ "statistics": { "count": 30, "approx_unique": 7, + "null_percentage": 0.0, "avg": -0.20391352520774528, "min": -0.696289340651918, "max": 0.07389933055073145, @@ -5198,7 +5199,6 @@ "q50": -0.13404160537759308, "q75": -0.13404160537759308, "std": 0.18248602147099546, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5209,6 +5209,7 @@ "statistics": { "count": 30, "approx_unique": 9, + "null_percentage": 0.0, "avg": -0.14237080497102328, "min": -0.287810300831832, "max": 0.010684053390404269, @@ -5216,7 +5217,6 @@ "q50": -0.15991801391544605, "q75": -0.10038447275439095, "std": 0.07615457562111168, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5227,6 +5227,7 @@ "statistics": { "count": 30, "approx_unique": 25, + "null_percentage": 0.0, "avg": -0.07697881371402719, "min": -0.4496618381879538, "max": 0.2859731329653391, @@ -5234,7 +5235,6 @@ "q50": -0.06456103587368225, "q75": -0.006646221384359696, "std": 0.1703789377586864, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5245,6 +5245,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": -0.3562449872317344, "min": -0.3685292971362768, "max": 3.6762017848461976e-16, @@ -5252,7 +5253,6 @@ "q50": -0.3685292971362768, "q75": -0.3685292971362768, "std": 0.06728393638102102, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5263,6 +5263,7 @@ "statistics": { "count": 30, "approx_unique": 13, + "null_percentage": 0.0, "avg": -0.47723873827109115, "min": -0.5646511566087611, "max": -0.3647858362062366, @@ -5270,7 +5271,6 @@ "q50": -0.5027495825792233, "q75": -0.4431217200284206, "std": 0.0609433896224526, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5281,6 +5281,7 @@ "statistics": { "count": 30, "approx_unique": 9, + "null_percentage": 0.0, "avg": -0.0830712134304094, "min": -0.12906911404610488, "max": 0.210618903884933, @@ -5288,7 +5289,6 @@ "q50": -0.12906911404610488, "q75": -0.009095951740138164, "std": 0.081767372523437, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5299,6 +5299,7 @@ "statistics": { "count": 30, "approx_unique": 17, + "null_percentage": 0.0, "avg": -0.28747432059097855, "min": -0.3878998715737425, "max": -0.20029877019826361, @@ -5306,7 +5307,6 @@ "q50": -0.2898674759956072, "q75": -0.2537691424216535, "std": 0.04564915362120373, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5317,6 +5317,7 @@ "statistics": { "count": 30, "approx_unique": 20, + "null_percentage": 0.0, "avg": 0.07055580995490353, "min": -0.06554862657759826, "max": 0.12421283717173691, @@ -5324,7 +5325,6 @@ "q50": 0.10386319233993987, "q75": 0.11719691210569562, "std": 0.05044761541898693, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5335,6 +5335,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 3.409800000000001, "min": 3.4098, "max": 3.4098, @@ -5342,7 +5343,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5353,6 +5353,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.3273700000000001, "min": -0.32737, "max": -0.32737, @@ -5360,7 +5361,6 @@ "q50": -0.32737, "q75": -0.32737, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5371,6 +5371,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.9604399999999994, "min": 0.96044, "max": 0.96044, @@ -5378,7 +5379,6 @@ "q50": 0.96044, "q75": 0.96044, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5389,6 +5389,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -3.7436000000000003, "min": -3.7436, "max": -3.7436, @@ -5396,7 +5397,6 @@ "q50": -3.7436, "q75": -3.7436, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5407,6 +5407,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.0191000000000008, "min": -1.0191, "max": -1.0191, @@ -5414,7 +5415,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5425,6 +5425,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020499999999999, "min": -6.0205, "max": -6.0205, @@ -5432,7 +5433,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5443,6 +5443,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5450,7 +5451,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5461,6 +5461,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5468,7 +5469,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5479,6 +5479,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5486,7 +5487,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5497,6 +5497,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5504,7 +5505,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5515,6 +5515,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5522,7 +5523,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5533,6 +5533,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5540,7 +5541,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5551,6 +5551,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5558,7 +5559,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5569,6 +5569,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5576,7 +5577,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5587,6 +5587,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5594,7 +5595,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5605,6 +5605,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5612,7 +5613,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5623,6 +5623,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5630,7 +5631,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5641,6 +5641,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5648,7 +5649,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5659,6 +5659,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 8.380399999999998e-17, "min": 8.3804e-17, "max": 8.3804e-17, @@ -5666,7 +5667,6 @@ "q50": 8.3804e-17, "q75": 8.3804e-17, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5677,6 +5677,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -4.811599999999999, "min": -4.8116, "max": -4.8116, @@ -5684,7 +5685,6 @@ "q50": -4.8116, "q75": -4.8116, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5695,6 +5695,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4033000000000009, "min": -1.4033, "max": -1.4033, @@ -5702,7 +5703,6 @@ "q50": -1.4033, "q75": -1.4033, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5713,6 +5713,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.13685999999999998, "min": -0.13685999999999998, "max": -0.13685999999999998, @@ -5720,7 +5721,6 @@ "q50": -0.13685999999999998, "q75": -0.13685999999999998, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5731,6 +5731,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0024718999999999995, "min": 0.0024719, "max": 0.0024719, @@ -5738,7 +5739,6 @@ "q50": 0.0024719, "q75": 0.0024719, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5749,6 +5749,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5756,7 +5757,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5767,6 +5767,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 9.802700000000004e-16, "min": 9.8027e-16, "max": 9.8027e-16, @@ -5774,7 +5775,6 @@ "q50": 9.8027e-16, "q75": 9.8027e-16, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5785,6 +5785,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -55.64200000000003, "min": -55.641999999999996, "max": -55.641999999999996, @@ -5792,7 +5793,6 @@ "q50": -55.641999999999996, "q75": -55.641999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5803,6 +5803,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -16.31200000000001, "min": -16.312, "max": -16.312, @@ -5810,7 +5811,6 @@ "q50": -16.312, "q75": -16.312, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5821,6 +5821,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.2042, "min": -1.2042, "max": -1.2042, @@ -5828,7 +5829,6 @@ "q50": -1.2042, "q75": -1.2042, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5839,6 +5839,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.021675, "min": 0.021675, "max": 0.021675, @@ -5846,7 +5847,6 @@ "q50": 0.021675, "q75": 0.021675, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5857,6 +5857,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5864,7 +5865,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5875,6 +5875,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 3.409780000000001, "min": 3.4097, "max": 3.4098, @@ -5882,7 +5883,6 @@ "q50": 3.4098, "q75": 3.4098, "std": 0.00004068381021726342, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5893,6 +5893,7 @@ "statistics": { "count": 30, "approx_unique": 6, + "null_percentage": 0.0, "avg": -0.3273716666666667, "min": -0.3274, "max": -0.32734, @@ -5900,7 +5901,6 @@ "q50": -0.32737, "q75": -0.32736, "std": 0.00001895245108948268, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5911,6 +5911,7 @@ "statistics": { "count": 30, "approx_unique": 6, + "null_percentage": 0.0, "avg": 0.960428, "min": 0.9603799999999999, "max": 0.96045, @@ -5918,7 +5919,6 @@ "q50": 0.96043, "q75": 0.96044, "std": 0.000019190514898497155, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5929,6 +5929,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -3.743700000000001, "min": -3.7437, "max": -3.7437, @@ -5936,7 +5937,6 @@ "q50": -3.7437, "q75": -3.7437, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5947,6 +5947,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.0191000000000008, "min": -1.0191, "max": -1.0191, @@ -5954,7 +5955,6 @@ "q50": -1.0191, "q75": -1.0191, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5965,6 +5965,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.020499999999999, "min": -6.0205, "max": -6.0205, @@ -5972,7 +5973,6 @@ "q50": -6.0205, "q75": -6.0205, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -5983,6 +5983,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -5990,7 +5991,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6001,6 +6001,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6008,7 +6009,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6019,6 +6019,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": -0.0, "max": -0.0, @@ -6026,7 +6027,6 @@ "q50": -0.0, "q75": -0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6037,6 +6037,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6044,7 +6045,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6055,6 +6055,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6062,7 +6063,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6073,6 +6073,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6080,7 +6081,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6091,6 +6091,7 @@ "statistics": { "count": 30, "approx_unique": 14, + "null_percentage": 0.0, "avg": 0.11649966666666664, "min": 0.10088, "max": 0.13227, @@ -6098,7 +6099,6 @@ "q50": 0.11657000000000001, "q75": 0.12105999999999999, "std": 0.00841944812809065, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6109,6 +6109,7 @@ "statistics": { "count": 30, "approx_unique": 10, + "null_percentage": 0.0, "avg": -6.5501966666666664, "min": -6.5663, "max": -6.5416, @@ -6116,7 +6117,6 @@ "q50": -6.5506, "q75": -6.5461, "std": 0.0058114917862462125, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6127,6 +6127,7 @@ "statistics": { "count": 30, "approx_unique": 12, + "null_percentage": 0.0, "avg": -2.815493333333334, "min": -2.8336, "max": -2.8023, @@ -6134,7 +6135,6 @@ "q50": -2.8157, "q75": -2.8089999999999997, "std": 0.007310405117947096, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6145,6 +6145,7 @@ "statistics": { "count": 30, "approx_unique": 12, + "null_percentage": 0.0, "avg": -0.8309449999999998, "min": -0.84335, "max": -0.8220000000000001, @@ -6152,7 +6153,6 @@ "q50": -0.83115, "q75": -0.82657, "std": 0.005530404206715397, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6163,6 +6163,7 @@ "statistics": { "count": 30, "approx_unique": 13, + "null_percentage": 0.0, "avg": 0.07035533333333334, "min": 0.062527, "max": 0.082352, @@ -6170,7 +6171,6 @@ "q50": 0.07015199999999999, "q75": 0.073202, "std": 0.004801363551536481, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6181,6 +6181,7 @@ "statistics": { "count": 30, "approx_unique": 20, + "null_percentage": 0.0, "avg": -0.19586799999999996, "min": -0.20893, "max": -0.18148, @@ -6188,7 +6189,6 @@ "q50": -0.19826, "q75": -0.18911, "std": 0.006909548315992475, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6199,6 +6199,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.11881999999999995, "min": 0.11882000000000001, "max": 0.11882000000000001, @@ -6206,7 +6207,6 @@ "q50": 0.11882000000000001, "q75": 0.11882000000000001, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6217,6 +6217,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -6.548300000000003, "min": -6.5483, "max": -6.5483, @@ -6224,7 +6225,6 @@ "q50": -6.5483, "q75": -6.5483, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6235,6 +6235,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -2.8157000000000014, "min": -2.8157, "max": -2.8157, @@ -6242,7 +6243,6 @@ "q50": -2.8157, "q75": -2.8157, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6253,6 +6253,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.8294699999999999, "min": -0.83267, "max": -0.82657, @@ -6260,7 +6261,6 @@ "q50": -0.8281, "q75": -0.8281, "std": 0.0023114706469527574, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6271,6 +6271,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.070152, "min": 0.07015199999999999, "max": 0.07015199999999999, @@ -6278,7 +6279,6 @@ "q50": 0.07015199999999999, "q75": 0.07015199999999999, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6289,6 +6289,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": -0.1961233333333334, "min": -0.19826, "max": -0.19216, @@ -6296,7 +6297,6 @@ "q50": -0.19826, "q75": -0.19368, "std": 0.002457224862113459, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6307,6 +6307,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 0.7699063333333331, "min": 0.7699, "max": 0.7699199999999999, @@ -6314,7 +6315,6 @@ "q50": 0.76991, "q75": 0.76991, "std": 5.560534167661252e-6, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6325,6 +6325,7 @@ "statistics": { "count": 30, "approx_unique": 5, + "null_percentage": 0.0, "avg": 0.41001499999999996, "min": 0.41, "max": 0.41003999999999996, @@ -6332,7 +6333,6 @@ "q50": 0.410015, "q75": 0.41002, "std": 0.000011371470653687081, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6343,6 +6343,7 @@ "statistics": { "count": 30, "approx_unique": 28, + "null_percentage": 0.0, "avg": 0.08279083333333333, "min": 0.082758, "max": 0.082817, @@ -6350,7 +6351,6 @@ "q50": 0.08279249999999999, "q75": 0.082804, "std": 0.000016772480196858457, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6361,6 +6361,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": -1.4093633333333337, "min": -1.4094, "max": -1.4093, @@ -6368,7 +6369,6 @@ "q50": -1.4094, "q75": -1.4093, "std": 0.000049013251785353614, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6379,6 +6379,7 @@ "statistics": { "count": 30, "approx_unique": 10, + "null_percentage": 0.0, "avg": 0.7859533333333333, "min": 0.7859, "max": 0.7859999999999999, @@ -6386,7 +6387,6 @@ "q50": 0.78596, "q75": 0.78597, "std": 0.000026565901573916655, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6397,6 +6397,7 @@ "statistics": { "count": 30, "approx_unique": 9, + "null_percentage": 0.0, "avg": -0.36821200000000004, "min": -0.36827, "max": -0.36818, @@ -6404,7 +6405,6 @@ "q50": -0.36821, "q75": -0.36819, "std": 0.000023252734359046097, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6415,6 +6415,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6422,7 +6423,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6433,6 +6433,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6440,7 +6441,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6451,6 +6451,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6458,7 +6459,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6469,6 +6469,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6476,7 +6477,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6487,6 +6487,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6494,7 +6495,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6505,6 +6505,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6512,7 +6513,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6523,6 +6523,7 @@ "statistics": { "count": 30, "approx_unique": 29, + "null_percentage": 0.0, "avg": -22.271400000000003, "min": -23.2, "max": -21.288, @@ -6530,7 +6531,6 @@ "q50": -22.372, "q75": -21.839000000000002, "std": 0.5428036159765707, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6541,6 +6541,7 @@ "statistics": { "count": 30, "approx_unique": 35, + "null_percentage": 0.0, "avg": -11.274933333333333, "min": -11.845999999999998, "max": -10.527999999999999, @@ -6548,7 +6549,6 @@ "q50": -11.270499999999998, "q75": -11.225999999999999, "std": 0.2594811711105828, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6559,6 +6559,7 @@ "statistics": { "count": 30, "approx_unique": 32, + "null_percentage": 0.0, "avg": -18.592833333333335, "min": -18.939, "max": -18.345, @@ -6566,7 +6567,6 @@ "q50": -18.604, "q75": -18.46, "std": 0.14494566956772725, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6577,6 +6577,7 @@ "statistics": { "count": 30, "approx_unique": 30, + "null_percentage": 0.0, "avg": -3.5490866666666667, "min": -3.6931, "max": -3.3167, @@ -6584,7 +6585,6 @@ "q50": -3.56555, "q75": -3.4866, "std": 0.10078614003417581, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6595,6 +6595,7 @@ "statistics": { "count": 30, "approx_unique": 32, + "null_percentage": 0.0, "avg": 5.835456666666667, "min": 5.7443, "max": 5.9639, @@ -6602,7 +6603,6 @@ "q50": 5.8376, "q75": 5.8786, "std": 0.06015949309290594, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6613,6 +6613,7 @@ "statistics": { "count": 30, "approx_unique": 27, + "null_percentage": 0.0, "avg": -1.9739266666666664, "min": -2.1362, "max": -1.845, @@ -6620,7 +6621,6 @@ "q50": -1.97865, "q75": -1.9192, "std": 0.07078052008136494, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6631,6 +6631,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7698999999999999, "min": 0.7699, "max": 0.7699, @@ -6638,7 +6639,6 @@ "q50": 0.7699, "q75": 0.7699, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6649,6 +6649,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.41004000000000024, "min": 0.41003999999999996, "max": 0.41003999999999996, @@ -6656,7 +6657,6 @@ "q50": 0.41003999999999996, "q75": 0.41003999999999996, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6667,6 +6667,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.08278199999999995, "min": 0.08278200000000001, "max": 0.08278200000000001, @@ -6674,7 +6675,6 @@ "q50": 0.08278200000000001, "q75": 0.08278200000000001, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6685,6 +6685,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -1.4094000000000002, "min": -1.4094, "max": -1.4094, @@ -6692,7 +6693,6 @@ "q50": -1.4094, "q75": -1.4094, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6703,6 +6703,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.7860000000000001, "min": 0.7859999999999999, "max": 0.7859999999999999, @@ -6710,7 +6711,6 @@ "q50": 0.7859999999999999, "q75": 0.7859999999999999, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6721,6 +6721,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": -0.3681300000000001, "min": -0.36813, "max": -0.36813, @@ -6728,7 +6729,6 @@ "q50": -0.36813, "q75": -0.36813, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6739,6 +6739,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6746,7 +6747,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6757,6 +6757,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6764,7 +6765,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6775,6 +6775,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6782,7 +6783,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6793,6 +6793,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6800,7 +6801,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6811,6 +6811,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6818,7 +6819,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6829,6 +6829,7 @@ "statistics": { "count": 30, "approx_unique": 1, + "null_percentage": 0.0, "avg": 0.0, "min": 0.0, "max": 0.0, @@ -6836,7 +6837,6 @@ "q50": 0.0, "q75": 0.0, "std": 0.0, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6847,6 +6847,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.04499999999999, "min": 48.00899999999999, "max": 48.068999999999996, @@ -6854,7 +6855,6 @@ "q50": 48.068999999999996, "q75": 48.068999999999996, "std": 0.02989637274734736, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6865,6 +6865,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 48.019, "min": 48.00899999999999, "max": 48.068999999999996, @@ -6872,7 +6873,6 @@ "q50": 48.00899999999999, "q75": 48.00899999999999, "std": 0.022742941307369282, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6883,6 +6883,7 @@ "statistics": { "count": 30, "approx_unique": 7, + "null_percentage": 0.0, "avg": 0.9214410000000002, "min": 0.82357, "max": 1.0742, @@ -6890,7 +6891,6 @@ "q50": 0.93099, "q75": 0.93099, "std": 0.054774079028754794, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6901,6 +6901,7 @@ "statistics": { "count": 30, "approx_unique": 4, + "null_percentage": 0.0, "avg": 47.832666666666675, "min": 47.803000000000004, "max": 47.864, @@ -6908,7 +6909,6 @@ "q50": 47.833999999999996, "q75": 47.864, "std": 0.024358434541922255, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6919,6 +6919,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.89466666666665, "min": 47.879, "max": 47.91, @@ -6926,7 +6927,6 @@ "q50": 47.895, "q75": 47.91, "std": 0.012874281589150457, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6937,6 +6937,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.81966666666665, "min": 47.788000000000004, "max": 47.833999999999996, @@ -6944,7 +6945,6 @@ "q50": 47.833999999999996, "q75": 47.833999999999996, "std": 0.021283526628521798, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6955,6 +6955,7 @@ "statistics": { "count": 30, "approx_unique": 3, + "null_percentage": 0.0, "avg": 47.829166666666666, "min": 47.818000000000005, "max": 47.849, @@ -6962,7 +6963,6 @@ "q50": 47.833999999999996, "q75": 47.833999999999996, "std": 0.008477488784531828, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6973,6 +6973,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.96033333333333, "min": 47.955, "max": 47.971000000000004, @@ -6980,7 +6981,6 @@ "q50": 47.955, "q75": 47.971000000000004, "std": 0.0076714128237685185, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -6991,6 +6991,7 @@ "statistics": { "count": 30, "approx_unique": 2, + "null_percentage": 0.0, "avg": 47.96033333333333, "min": 47.955, "max": 47.971000000000004, @@ -6998,7 +6999,6 @@ "q50": 47.955, "q75": 47.971000000000004, "std": 0.007671412823767137, - "null_percentage": 0.0, "column_type": "DOUBLE", "type": "numerical" } @@ -7020,19 +7020,19 @@ "num_threads": 0, "propositionalization": { "aggregation": [ - "MEDIAN", + "COUNT DISTINCT", "STDDEV", - "COUNT", "AVG", - "TREND", - "COUNT DISTINCT", - "SUM", "FIRST", - "MODE", - "LAST", + "MEDIAN", + "COUNT", "COUNT MINUS COUNT DISTINCT", - "MAX", - "MIN" + "LAST", + "MIN", + "MODE", + "TREND", + "SUM", + "MAX" ], "delta_t": 0.0, "loss_function": "SquareLoss", @@ -7108,7 +7108,7 @@ "preprocessors": [], "share_selected_features": 0.5, "tags": [ - "container-0ENVoV" + "container-nzpwqU" ], "targets": [ "f_x", @@ -8354,7 +8354,7 @@ }, "scores": [ { - "date_time": "2025-08-19T22:33:26", + "date_time": "2025-08-22T16:36:30", "set_used": "train", "target": "f_x", "mae": 0.010792690446641749, @@ -8363,7 +8363,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:26", + "date_time": "2025-08-22T16:36:30", "set_used": "train", "target": "f_y", "mae": 0.008576129904852969, @@ -8372,7 +8372,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:26", + "date_time": "2025-08-22T16:36:30", "set_used": "train", "target": "f_z", "mae": 0.007760050167507584, @@ -8381,7 +8381,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:35", + "date_time": "2025-08-22T16:36:41", "set_used": "test", "target": "f_x", "mae": 0.19983083674112964, @@ -8390,7 +8390,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:35", + "date_time": "2025-08-22T16:36:41", "set_used": "test", "target": "f_y", "mae": 0.17922503092447914, @@ -8399,7 +8399,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:35", + "date_time": "2025-08-22T16:36:41", "set_used": "test", "target": "f_z", "mae": 0.1051623312123616, @@ -8408,7 +8408,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:36", + "date_time": "2025-08-22T16:36:42", "set_used": "validation", "target": "f_x", "mae": 0.37266234512329094, @@ -8417,7 +8417,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:36", + "date_time": "2025-08-22T16:36:42", "set_used": "validation", "target": "f_y", "mae": 0.2962504494476319, @@ -8426,7 +8426,7 @@ "type": "regression" }, { - "date_time": "2025-08-19T22:33:36", + "date_time": "2025-08-22T16:36:42", "set_used": "validation", "target": "f_z", "mae": 0.2142715495045979, diff --git a/tests/integration/data/robot/robot.py b/tests/integration/data/robot/robot.py index ab883c8..ff071a6 100644 --- a/tests/integration/data/robot/robot.py +++ b/tests/integration/data/robot/robot.py @@ -26,7 +26,7 @@ logger: logging.Logger = logging.getLogger(__name__) -class RobotProject(GetMLProject): +class RobotProject(GetMLProject, frozen=True): pass diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 860982e..4cfe85a 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,6 +1,9 @@ import copy import re -from collections.abc import Mapping, Sequence +from collections.abc import ( + Mapping, + Sequence, +) from datetime import datetime, timezone from pathlib import Path from typing import Any, Protocol @@ -16,6 +19,7 @@ from getml import predictors as getml_predictor from getml import preprocessors as getml_preprocessor from getml.data import Container, DataFrame, Placeholder, Subset, View +from getml.data.roles.types import Role as GetMLRole from getml.feature_learning.loss_functions import CROSSENTROPYLOSS from getml.pipeline import Features as GetMLFeatures from getml.pipeline import Pipeline @@ -37,8 +41,9 @@ from getml_io.metadata.container_information import ContainerInformation from getml_io.metadata.data_model_information import DataModelInformation from getml_io.metadata.dataframe_information import ( - ColumnProfile, + ColumnInformation, ColumnStatisticsNumerical, + ColumnType, DataFrameInformation, ) from getml_io.metadata.pipeline_information import LossFunction, PipelineInformation @@ -47,6 +52,10 @@ PlaceholderInformation, ) from getml_io.utils.convert import assume_is_str +from tests.unit.types import ( + ColumnInformationType, + StatisticsType, +) @pytest.fixture @@ -146,8 +155,8 @@ def container_information_empty() -> ContainerInformation: @pytest.fixture -def column_profile_default() -> ColumnProfile: - return ColumnProfile( +def column_information_default() -> ColumnInformation: + return ColumnInformation( name="default", role=Role.NUMERICAL, statistics=ColumnStatisticsNumerical( @@ -161,7 +170,7 @@ def column_profile_default() -> ColumnProfile: q75=1, std=1, null_percentage=1, - column_type="DOUBLE", + column_type=ColumnType.DOUBLE, ), ) @@ -188,13 +197,13 @@ def subsets_path(container_path: Path) -> Path: @pytest.fixture def dataframe_information_population( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, population_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="population", path=population_path / "population.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @@ -215,25 +224,25 @@ def feature_sets_path(pipeline_path: Path) -> Path: @pytest.fixture def dataframe_information_peripheral( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, peripheral_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="peripheral", path=peripheral_path / "peripheral.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @pytest.fixture def dataframe_information_subset( tmp_path: Path, - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, ) -> DataFrameInformation: return DataFrameInformation( name="subset", path=tmp_path / "subset.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @@ -260,6 +269,23 @@ def container_information( ) +def _role_by_name(name: str) -> GetMLRole: + for role in ( + getml_roles.categorical, + getml_roles.join_key, + getml_roles.numerical, + getml_roles.target, + getml_roles.text, + getml_roles.time_stamp, + getml_roles.unused_float, + getml_roles.unused_string, + ): + if name.startswith(role): + return role + + return getml_roles.numerical + + @pytest.fixture def mock_dataframe( mocker: pytest_mock.MockerFixture, @@ -267,21 +293,17 @@ def mock_dataframe( dataframe = mocker.Mock(DataFrame) dataframe.name = "mock_dataframe_name" dataframe.columns = [ - "Categorical0", - "Categorical1", - "Categorical2", - "Categorical3", - "Numerical0", - "Numerical1", - "Numerical2", - "Numerical3", + "categorical0", + "join_key0", + "numerical0", + "target0", + "text0", + "time_stamp0", + "unused_float0", + "unused_string0", ] dataframe.roles = mocker.MagicMock() - dataframe.roles.column = ( - lambda name: getml_roles.categorical # pyright: ignore [reportUnknownLambdaType] - if name.startswith("Categorical") # pyright: ignore [reportUnknownMemberType] - else getml_roles.numerical - ) + dataframe.roles.column = _role_by_name return dataframe @@ -294,37 +316,37 @@ def mock_dataframe_train(mock_dataframe: DataFrame) -> DataFrame: @pytest.fixture def dataframe_information_train( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, subsets_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="dataframe_train", path=subsets_path / "dataframe_train.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @pytest.fixture def dataframe_information_test( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, subsets_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="dataframe_test", path=subsets_path / "dataframe_test.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @pytest.fixture def dataframe_information_validation( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, subsets_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="dataframe_validation", path=subsets_path / "dataframe_validation.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @@ -345,12 +367,12 @@ def mock_dataframe_validation(mock_dataframe: DataFrame) -> DataFrame: @pytest.fixture def dataframe_information( tmp_path: Path, - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, ) -> DataFrameInformation: return DataFrameInformation( name="dataframe_name", path=tmp_path / "dataframe_name.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @@ -417,9 +439,9 @@ def pipeline_information_empty( @pytest.fixture def ndarray() -> NDArray[np.float64]: return np.array( - [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]], + [[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.0]], dtype=np.float64, - ) + ).T @pytest.fixture @@ -428,7 +450,7 @@ def mock_features(mocker: pytest_mock.MockerFixture) -> GetMLFeatures: feature = mocker.MagicMock(GetMLFeature) feature.name = "test_feature" feature.index = 0 - feature.target = "test_target" + feature.target = "target0" feature.importance = 0.5 feature.correlation = 0.1 feature.sql = "SELECT * FROM test_table" @@ -442,13 +464,15 @@ def mock_scores_regression(mocker: pytest_mock.MockerFixture) -> GetMLScores: regression_score = mocker.MagicMock(GetMLRegressionScore) regression_score.date_time = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) regression_score.set_used = "test_set" - regression_score.target = "test_target" + regression_score.target = "target0" regression_score.mae = 0.1 regression_score.rmse = 0.2 regression_score.rsquared = 0.8 - scores.__iter__.return_value = iter([ # pyright: ignore [reportAny] - regression_score, - ]) + scores.__iter__.return_value = iter( # pyright: ignore [reportAny] + [ + regression_score, + ], + ) scores.is_classification = False scores.is_regression = True return scores @@ -460,13 +484,15 @@ def mock_scores_classification(mocker: pytest_mock.MockerFixture) -> GetMLScores classification_score = mocker.MagicMock(GetMLClassificationScore) classification_score.date_time = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) classification_score.set_used = "test_set" - classification_score.target = "test_target" + classification_score.target = "target0" classification_score.accuracy = 0.1 classification_score.auc = 0.2 classification_score.cross_entropy = 0.8 - scores.__iter__.return_value = iter([ # pyright: ignore [reportAny] - classification_score, - ]) + scores.__iter__.return_value = iter( # pyright: ignore [reportAny] + [ + classification_score, + ], + ) scores.is_classification = True scores.is_regression = False return scores @@ -512,7 +538,7 @@ def pipeline_transform(_: DataFrame | View | Subset, *, df_name: str) -> DataFra pipeline.is_regression = True pipeline.share_selected_features = 0.0 pipeline.tags = ["test_tag"] - pipeline.targets = ["test_target"] + pipeline.targets = ["target0"] pipeline.features = mock_features pipeline.scores = mock_scores_regression return pipeline @@ -532,25 +558,25 @@ def mock_project_empty( @pytest.fixture def dataframe_information_features_test( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, feature_sets_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="features.test", path=feature_sets_path / "features.test.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @pytest.fixture def dataframe_information_features_validation( - column_profile_default: ColumnProfile, + column_information_default: ColumnInformation, feature_sets_path: Path, ) -> DataFrameInformation: return DataFrameInformation( name="features.validation", path=feature_sets_path / "features.validation.parquet", - column_profile={"default": column_profile_default}, + columns={"default": column_information_default}, ) @@ -635,7 +661,7 @@ def features() -> Features: "feature_1_1": Feature( name="feature_1_1", index=0, - target="test_target", + target="target0", importance=0.01357680886229068, correlation=-0.13123721580660005, sql='DROP TABLE IF EXISTS "FEATURE_1_1";\n\nCREATE TABLE "FEATURE_1_1" AS\nSELECT AVG( \n CASE\n WHEN ( t2."50" > -0.203234 ) AND ( t2."69" > -21.775444 ) AND ( t1."48" > -0.835814 ) THEN 1.044429922263974\n WHEN ( t2."50" > -0.203234 ) AND ( t2."69" > -21.775444 ) AND ( t1."48" <= -0.835814 OR t1."48" IS NULL ) THEN 1.59523047916995\n WHEN ( t2."50" > -0.203234 ) AND ( t2."69" <= -21.775444 OR t2."69" IS NULL ) AND ( t2."46" > -6.542175 ) THEN 0.7399658893921874\n WHEN ( t2."50" > -0.203234 ) AND ( t2."69" <= -21.775444 OR t2."69" IS NULL ) AND ( t2."46" <= -6.542175 OR t2."46" IS NULL ) THEN -0.01913377577846733\n WHEN ( t2."50" <= -0.203234 OR t2."50" IS NULL ) AND ( t1."71" > -18.316600 ) AND ( t2."35" > 0.960430 ) THEN -5.809133775778446\n WHEN ( t2."50" <= -0.203234 OR t2."50" IS NULL ) AND ( t1."71" > -18.316600 ) AND ( t2."35" <= 0.960430 OR t2."35" IS NULL ) THEN 9.641016436804664\n WHEN ( t2."50" <= -0.203234 OR t2."50" IS NULL ) AND ( t1."71" <= -18.316600 OR t1."71" IS NULL ) AND ( t1."70" > -10.965400 ) THEN -2.051889230264734\n WHEN ( t2."50" <= -0.203234 OR t2."50" IS NULL ) AND ( t1."71" <= -18.316600 OR t1."71" IS NULL ) AND ( t1."70" <= -10.965400 OR t1."70" IS NULL ) THEN -0.8315943548443633\n ELSE NULL\n END\n) AS "feature_1_1",\n t1.rowid AS rownum\nFROM "POPULATION__STAGING_TABLE_1" t1\nINNER JOIN "FULL__STAGING_TABLE_2" t2\nON 1 = 1\nWHERE t2."rowid" <= t1."rowid"\nAND ( t2."rowid__30_000000" > t1."rowid" OR t2."rowid__30_000000" IS NULL )\nGROUP BY t1.rowid;', # noqa: E501 @@ -643,7 +669,7 @@ def features() -> Features: "3": Feature( name="3", index=1, - target="test_target", + target="target0", importance=0.0, correlation=0.0, sql="", @@ -651,7 +677,7 @@ def features() -> Features: "column_01": Feature( name="column_01", index=2, - target="test_target", + target="target0", importance=1.144393770273378e-6, correlation=-0.09593210355580734, sql="", @@ -659,7 +685,7 @@ def features() -> Features: "duration": Feature( name="duration", index=3, - target="test_target", + target="target0", importance=0.0005039782195063603, correlation=-0.0271918442877901, sql="", @@ -673,7 +699,7 @@ def scores() -> Scores: ClassificationScore( date_time=datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc), set_used="test_set", - target="test_target", + target="target0", accuracy=0.9, auc=0.95, cross_entropy=0.05, @@ -736,49 +762,77 @@ def pipeline_information( # noqa: PLR0913 preprocessors=[category_trimmer], share_selected_features=0.0, tags=["test_tag"], - targets=["test_target"], + targets=["target0"], data_model=data_model_information, features=features, scores=scores, ) -def generate_raw_summary_statistics_pd(dataframe: DataFrame | View) -> pd.DataFrame: - data = { - "column_name": dict(enumerate(dataframe.columns)), - "column_type": { - index: "VARCHAR" - if ( - dataframe.roles.column(name) - in {getml_roles.categorical, getml_roles.text} - ) - else "DOUBLE" - for index, name in enumerate(dataframe.columns) - }, - "min": { - index: "" - if dataframe.roles.column(name) == getml_roles.categorical - else 0.0 - for index, name in enumerate(dataframe.columns) - }, - "max": { - index: "" - if dataframe.roles.column(name) == getml_roles.categorical - else 0.0 - for index, name in enumerate(dataframe.columns) - }, - "approx_unique": dict.fromkeys(range(len(dataframe.columns)), 0), - "avg": dict.fromkeys(range(len(dataframe.columns)), float("nan")), - "std": dict.fromkeys(range(len(dataframe.columns)), float("nan")), - "q25": dict.fromkeys(range(len(dataframe.columns)), float("nan")), - "q50": dict.fromkeys(range(len(dataframe.columns)), float("nan")), - "q75": dict.fromkeys(range(len(dataframe.columns)), float("nan")), - "count": dict.fromkeys(range(len(dataframe.columns)), 0), - "null_percentage": dict.fromkeys( - range(len(dataframe.columns)), - float("nan"), - ), +def _column_summary_statistics_varchar( + name: str, +) -> dict[str, str | int | float | None]: + return { + "column_name": name, + "column_type": ColumnType.VARCHAR.value, + "count": 0, + "approx_unique": 0, + "null_percentage": float("nan"), + "min": "", + "max": "", + "avg": None, + "std": None, + "q25": None, + "q50": None, + "q75": None, + } + + +def _column_summary_statistics_double(name: str) -> dict[str, str | int | float | None]: + return { + "column_name": name, + "column_type": ColumnType.DOUBLE.value, + "count": 0, + "approx_unique": 0, + "null_percentage": float("nan"), + "min": 0.0, + "max": 0.0, + "avg": float("nan"), + "std": float("nan"), + "q25": float("nan"), + "q50": float("nan"), + "q75": float("nan"), } + + +def _generate_raw_summary_statistics_for_column( + dataframe: DataFrame | View, + name: str, +) -> dict[str, str | int | float | None]: + if dataframe.roles.column(name) in { + getml_roles.categorical, + getml_roles.join_key, + getml_roles.text, + getml_roles.unused_string, + }: + return _column_summary_statistics_varchar(name) + + if dataframe.roles.column(name) in { + getml_roles.numerical, + getml_roles.target, + getml_roles.time_stamp, + getml_roles.unused_float, + }: + return _column_summary_statistics_double(name) + message = f"Role of column {name} not recognized." + raise ValueError(message) + + +def _generate_raw_summary_statistics_pd(dataframe: DataFrame | View) -> pd.DataFrame: + data = [ + _generate_raw_summary_statistics_for_column(dataframe, name) + for name in dataframe.columns + ] return pd.DataFrame(data) @@ -801,7 +855,7 @@ def mock_duckdb_execute( connection = mocker.MagicMock(DuckDBPyConnection) connection_context_manager.__enter__.return_value = connection # pyright: ignore [reportAny] _ = mocker.patch( - "getml_io.serialize.dataframe_or_view.duckdb.connect", + "getml_io.serialize.parquet.duckdb.connect", return_value=connection_context_manager, ) @@ -817,7 +871,7 @@ def mocked_df() -> pd.DataFrame: current_dataframe = dataframes_by_path[ current_parquet_path.relative_to(tmp_path) ] - return generate_raw_summary_statistics_pd(current_dataframe) + return _generate_raw_summary_statistics_pd(current_dataframe) mock_execution.df.side_effect = mocked_df # pyright: ignore [reportAny] return mock_execution @@ -847,3 +901,93 @@ def mock_from_array( "getml_io.serialize.pipeline.DataFrame.from_arrow", side_effect=mock_from_array, ) + + +def build_statistics_varchar(column_type: str) -> StatisticsType: + return { + "approx_unique": 0, + "column_type": "VARCHAR", + "count": 0, + "max": "", + "min": "", + "null_percentage": None, + "type": column_type, + } + + +def build_statistics_double(column_type: str) -> StatisticsType: + return { + "approx_unique": 0, + "avg": None, + "column_type": "DOUBLE", + "count": 0, + "max": 0.0, + "min": 0.0, + "null_percentage": None, + "q25": None, + "q50": None, + "q75": None, + "std": None, + "type": column_type, + } + + +def build_column_information_by_name() -> ColumnInformationType: + return { + "categorical0": { + "name": "categorical0", + "role": "categorical", + "statistics": build_statistics_varchar("categorical"), + }, + "join_key0": { + "name": "join_key0", + "role": "join_key", + "statistics": build_statistics_varchar("join_key"), + }, + "numerical0": { + "name": "numerical0", + "role": "numerical", + "statistics": build_statistics_double("numerical"), + }, + "target0": { + "name": "target0", + "role": "target", + "statistics": build_statistics_double("target"), + }, + "text0": { + "name": "text0", + "role": "text", + "statistics": build_statistics_varchar("text"), + }, + "time_stamp0": { + "name": "time_stamp0", + "role": "time_stamp", + "statistics": build_statistics_double("time_stamp_float"), + }, + "unused_float0": { + "name": "unused_float0", + "role": "unused_float", + "statistics": build_statistics_double("unused_float"), + }, + "unused_string0": { + "name": "unused_string0", + "role": "unused_string", + "statistics": build_statistics_varchar("unused_string"), + }, + } + + +def get_expected_column_information_by_name() -> dict[str, ColumnInformation]: + return { + name: ColumnInformation.model_validate(column_information) + for name, column_information in build_column_information_by_name().items() + } + + +def column_information_by_name_to_json( + column_information_by_name: Mapping[str, ColumnInformation], +) -> dict[str, str]: + return { + column_name: ColumnInformation.model_dump_json(column_information) + for column_name, column_information in column_information_by_name.items() + } diff --git a/tests/unit/metadata/test_container_information.py b/tests/unit/metadata/test_container_information.py index 8e69855..a80657e 100644 --- a/tests/unit/metadata/test_container_information.py +++ b/tests/unit/metadata/test_container_information.py @@ -4,7 +4,7 @@ from getml_io.getml.roles import Role from getml_io.metadata.container_information import ContainerInformation -from tests.unit.types import ColumnProfileType, ContainerInformationType +from tests.unit.types import ColumnInformationType, ContainerInformationType @pytest.mark.unit @@ -56,7 +56,7 @@ def _get_expected_serialized_container_information( peripheral_path: Path, subsets_path: Path, ) -> ContainerInformationType: - expected_column_profile: ColumnProfileType = { + expected_column_information_by_name: ColumnInformationType = { "default": { "name": "default", "role": Role.NUMERICAL, @@ -81,30 +81,30 @@ def _get_expected_serialized_container_information( "population": { "name": "population", "path": population_path / "population.parquet", - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "peripheral": { "peripheral": { "name": "peripheral", "path": peripheral_path / "peripheral.parquet", - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "subsets": { "train": { "name": "dataframe_train", "path": subsets_path / "dataframe_train.parquet", - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "test": { "name": "dataframe_test", "path": subsets_path / "dataframe_test.parquet", - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "validation": { "name": "dataframe_validation", "path": subsets_path / "dataframe_validation.parquet", - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "deep_copy": True, diff --git a/tests/unit/metadata/test_pipeline_information.py b/tests/unit/metadata/test_pipeline_information.py index 9436e29..e555d10 100644 --- a/tests/unit/metadata/test_pipeline_information.py +++ b/tests/unit/metadata/test_pipeline_information.py @@ -6,7 +6,7 @@ from getml_io.getml.relationships import Relationship from getml_io.getml.roles import Role from getml_io.metadata.pipeline_information import LossFunction, PipelineInformation -from tests.unit.types import ColumnProfileType, PipelineInformationType +from tests.unit.types import ColumnInformationType, PipelineInformationType @pytest.mark.unit @@ -81,7 +81,7 @@ def test_serialize_model( def _get_expected_serialized_pipeline_information() -> PipelineInformationType: - expected_column_profile: ColumnProfileType = { + expected_column_information_by_name: ColumnInformationType = { "default": { "name": "default", "role": Role.NUMERICAL, @@ -107,24 +107,24 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "test": { "name": "dataframe_test", "path": Path("pipeline/predictions/dataframe_test.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "validation": { "name": "dataframe_validation", "path": Path("pipeline/predictions/dataframe_validation.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "feature_sets": { "test": { "name": "features.test", "path": Path("pipeline/feature_sets/features.test.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "validation": { "name": "features.validation", "path": Path("pipeline/feature_sets/features.validation.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "data_model": { @@ -270,7 +270,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "test_tag", ], "targets": [ - "test_target", + "target0", ], "features": { "3": { @@ -279,7 +279,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "index": 1, "name": "3", "sql": "", - "target": "test_target", + "target": "target0", }, "column_01": { "correlation": -0.09593210355580734, @@ -287,7 +287,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "index": 2, "name": "column_01", "sql": "", - "target": "test_target", + "target": "target0", }, "duration": { "correlation": -0.0271918442877901, @@ -295,7 +295,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "index": 3, "name": "duration", "sql": "", - "target": "test_target", + "target": "target0", }, "feature_1_1": { "correlation": -0.13123721580660005, @@ -343,7 +343,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "IS NULL )\n" "GROUP BY t1.rowid;" ), - "target": "test_target", + "target": "target0", }, }, "scores": [ @@ -353,7 +353,7 @@ def _get_expected_serialized_pipeline_information() -> PipelineInformationType: "cross_entropy": 0.05, "date_time": datetime(2023, 1, 1, 12, 0, tzinfo=timezone.utc), "set_used": "test_set", - "target": "test_target", + "target": "target0", "type": "classification", }, ], diff --git a/tests/unit/serialize/test_container.py b/tests/unit/serialize/test_container.py index 6129f8a..2a91118 100644 --- a/tests/unit/serialize/test_container.py +++ b/tests/unit/serialize/test_container.py @@ -9,8 +9,14 @@ assume_is_dict_str_to_dataframe_or_view, assume_is_optional_dataframe_or_view, ) -from tests.unit.conftest import MockDuckDBExecuteFactory -from tests.unit.types import ColumnProfileType, ContainerInformationType, StatisticsType +from tests.unit.conftest import ( + MockDuckDBExecuteFactory, + build_column_information_by_name, +) +from tests.unit.types import ( + ColumnInformationType, + ContainerInformationType, +) @pytest.mark.unit @@ -110,102 +116,40 @@ def _get_expected_container_information( peripheral_path: Path, subsets_path: Path, ) -> ContainerInformationType: - expected_statistics_categorical = { - "approx_unique": 0, - "column_type": "VARCHAR", - "count": 0, - "max": "", - "min": "", - "null_percentage": None, - "type": "categorical", - } - expected_statistics_numerical: StatisticsType = { - "approx_unique": 0, - "avg": None, - "column_type": "DOUBLE", - "count": 0, - "max": 0.0, - "min": 0.0, - "null_percentage": None, - "q25": None, - "q50": None, - "q75": None, - "std": None, - "type": "numerical", - } - expected_column_profile: ColumnProfileType = { - "Categorical0": { - "name": "Categorical0", - "role": "categorical", - "statistics": expected_statistics_categorical, - }, - "Categorical1": { - "name": "Categorical1", - "role": "categorical", - "statistics": expected_statistics_categorical, - }, - "Categorical2": { - "name": "Categorical2", - "role": "categorical", - "statistics": expected_statistics_categorical, - }, - "Categorical3": { - "name": "Categorical3", - "role": "categorical", - "statistics": expected_statistics_categorical, - }, - "Numerical0": { - "name": "Numerical0", - "role": "numerical", - "statistics": expected_statistics_numerical, - }, - "Numerical1": { - "name": "Numerical1", - "role": "numerical", - "statistics": expected_statistics_numerical, - }, - "Numerical2": { - "name": "Numerical2", - "role": "numerical", - "statistics": expected_statistics_numerical, - }, - "Numerical3": { - "name": "Numerical3", - "role": "numerical", - "statistics": expected_statistics_numerical, - }, - } + expected_column_information_by_name: ColumnInformationType = ( + build_column_information_by_name() + ) return { "id": "mock_container_id", "population": { "name": "mock_population_name", "path": str(population_path / "mock_population_name.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "peripheral": { "mock_peripheral_name": { "name": "mock_peripheral_name", "path": str(peripheral_path / "mock_peripheral_name.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "subsets": { "test": { "name": "mock_dataframe_test", "path": str(subsets_path / "test.mock_dataframe_test.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "train": { "name": "mock_dataframe_train", "path": str(subsets_path / "train.mock_dataframe_train.parquet"), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, "validation": { "name": "mock_dataframe_validation", "path": str( subsets_path / "validation.mock_dataframe_validation.parquet", ), - "column_profile": expected_column_profile, + "columns": expected_column_information_by_name, }, }, "deep_copy": False, diff --git a/tests/unit/serialize/test_dataframe_or_view.py b/tests/unit/serialize/test_dataframe_or_view.py index a5b8120..92a01c0 100644 --- a/tests/unit/serialize/test_dataframe_or_view.py +++ b/tests/unit/serialize/test_dataframe_or_view.py @@ -1,23 +1,21 @@ -from collections.abc import Mapping from pathlib import Path import pytest import pytest_mock from getml.data import DataFrame -from getml_io.getml.roles import Role -from getml_io.metadata.dataframe_information import ( - ColumnProfile, - ColumnStatisticsCategorical, - ColumnStatisticsNumerical, +from getml_io.serialize.dataframe_or_view import ( + serialize_dataframe_or_view, ) -from getml_io.serialize.dataframe_or_view import serialize_dataframe_or_view from getml_io.serialize.exception import ( DataFrameParquetStorageError, - UnsupportedColumnStatisticsError, ) from getml_io.utils.exception import StorageDirectoryCreationError -from tests.unit.conftest import MockDuckDBExecuteFactory +from tests.unit.conftest import ( + MockDuckDBExecuteFactory, + column_information_by_name_to_json, + get_expected_column_information_by_name, +) @pytest.mark.unit @@ -49,88 +47,10 @@ def test_serialize_dataframe_or_view( assert serialized_info.path == expected_parquet_path mock_dataframe.to_parquet.assert_called_once_with(str(expected_parquet_path)) - expected_column_profile = _get_expected_column_profile() - assert column_profiles_to_json( - serialized_info.column_profile, - ) == column_profiles_to_json(expected_column_profile) - - -def _get_expected_column_profile() -> dict[str, ColumnProfile]: - nan = float("nan") - expected_statistics_categorical = ColumnStatisticsCategorical( - count=0, - approx_unique=0, - min="", - max="", - null_percentage=nan, - column_type="VARCHAR", - type="categorical", - ) - expected_statistics_numerical = ColumnStatisticsNumerical( - count=0, - approx_unique=0, - avg=nan, - min=0.0, - max=0.0, - q25=nan, - q50=nan, - q75=nan, - std=nan, - null_percentage=nan, - column_type="DOUBLE", - type="numerical", - ) - return { - "Categorical0": ColumnProfile( - name="Categorical0", - role=Role.CATEGORICAL, - statistics=expected_statistics_categorical, - ), - "Categorical1": ColumnProfile( - name="Categorical1", - role=Role.CATEGORICAL, - statistics=expected_statistics_categorical, - ), - "Categorical2": ColumnProfile( - name="Categorical2", - role=Role.CATEGORICAL, - statistics=expected_statistics_categorical, - ), - "Categorical3": ColumnProfile( - name="Categorical3", - role=Role.CATEGORICAL, - statistics=expected_statistics_categorical, - ), - "Numerical0": ColumnProfile( - name="Numerical0", - role=Role.NUMERICAL, - statistics=expected_statistics_numerical, - ), - "Numerical1": ColumnProfile( - name="Numerical1", - role=Role.NUMERICAL, - statistics=expected_statistics_numerical, - ), - "Numerical2": ColumnProfile( - name="Numerical2", - role=Role.NUMERICAL, - statistics=expected_statistics_numerical, - ), - "Numerical3": ColumnProfile( - name="Numerical3", - role=Role.NUMERICAL, - statistics=expected_statistics_numerical, - ), - } - - -def column_profiles_to_json( - column_profiles_by_name: Mapping[str, ColumnProfile], -) -> dict[str, str]: - return { - column_name: ColumnProfile.model_dump_json(column_profile) - for column_name, column_profile in column_profiles_by_name.items() - } + expected_column_information_by_name = get_expected_column_information_by_name() + assert column_information_by_name_to_json( + serialized_info.columns, + ) == column_information_by_name_to_json(expected_column_information_by_name) @pytest.mark.unit @@ -167,33 +87,3 @@ def test_serialize_dataframe_or_view_storage_error( match=r"Failed to store DataFrame as parquet 'mock_dataframe_name' at path", ): _ = serialize_dataframe_or_view(mock_dataframe, target_storage_directory) - - -@pytest.mark.unit -def test_serialize_dataframe_or_view_statistics_role_error( - tmp_path: Path, - mocker: pytest_mock.MockerFixture, - mock_dataframe: DataFrame, - mock_duckdb_execute_factory: MockDuckDBExecuteFactory, -) -> None: - # Given - mock_duckdb_execute_factory( - { - Path(f"dataframes/{mock_dataframe.name}.parquet"): mock_dataframe, - }, - ) - - target_storage_directory = tmp_path / "dataframes" - mock_dataframe.to_parquet = mocker.Mock() - _ = mocker.patch.object(mock_dataframe.roles, "column", return_value=Role.TEXT) - _ = mocker.patch.object(mock_dataframe, "columns", ["Text0"]) - - # When / Then - with pytest.raises( - UnsupportedColumnStatisticsError, - match=( - rf"Column 'Text0' in dataframe 'mock_dataframe_name' " - rf"has an unsupported role: {Role.TEXT!r} and type: VARCHAR" - ), - ): - _ = serialize_dataframe_or_view(mock_dataframe, target_storage_directory) diff --git a/tests/unit/serialize/test_parquet.py b/tests/unit/serialize/test_parquet.py new file mode 100644 index 0000000..7130169 --- /dev/null +++ b/tests/unit/serialize/test_parquet.py @@ -0,0 +1,157 @@ +from pathlib import Path + +import getml.data.roles as getml_roles +import pytest +from getml.data import DataFrame +from getml.data.roles.types import Role as GetMLRole + +from getml_io.getml.roles import Role +from getml_io.metadata.dataframe_information import ( + ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING, + ColumnStatistics, + ColumnStatisticsCategorical, + ColumnStatisticsJoinKey, + ColumnStatisticsNumerical, + ColumnStatisticsTarget, + ColumnStatisticsText, + ColumnStatisticsTimeStamp, + ColumnStatisticsTimeStampAsFloat, + ColumnStatisticsUnusedFloat, + ColumnStatisticsUnusedString, + ColumnType, +) +from getml_io.serialize.exception import ( + UnsupportedColumnStatisticsError, +) +from getml_io.serialize.parquet import ( + _get_column_statistics_type, # pyright: ignore[reportPrivateUsage] + serialize_dataframe, +) +from getml_io.serialize.roles import serialize_role +from getml_io.utils.convert import assume_is_str +from tests.unit.conftest import ( + MockDuckDBExecuteFactory, + column_information_by_name_to_json, + get_expected_column_information_by_name, +) + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("getml_role", "column_type"), + [ + (getml_roles.categorical, ColumnType.DOUBLE), + (getml_roles.categorical, ColumnType.TIMESTAMP_NS), + (getml_roles.join_key, ColumnType.DOUBLE), + (getml_roles.join_key, ColumnType.TIMESTAMP_NS), + (getml_roles.text, ColumnType.DOUBLE), + (getml_roles.text, ColumnType.TIMESTAMP_NS), + (getml_roles.unused_string, ColumnType.DOUBLE), + (getml_roles.unused_string, ColumnType.TIMESTAMP_NS), + (getml_roles.numerical, ColumnType.VARCHAR), + (getml_roles.numerical, ColumnType.TIMESTAMP_NS), + (getml_roles.target, ColumnType.VARCHAR), + (getml_roles.target, ColumnType.TIMESTAMP_NS), + (getml_roles.time_stamp, ColumnType.VARCHAR), + (getml_roles.unused_float, ColumnType.VARCHAR), + (getml_roles.unused_float, ColumnType.TIMESTAMP_NS), + ], +) +def test__get_column_statistics_type_error( + getml_role: GetMLRole, + column_type: ColumnType, +) -> None: + # Given + column_name = "TestColumn" + dataframe_name = "mock_dataframe_name" + role = serialize_role(getml_role) + + # When / Then + with pytest.raises( + UnsupportedColumnStatisticsError, + match=( + rf"Column '{column_name}' in dataframe '{dataframe_name}' " + rf"has an unsupported role: {role!r} and type: {column_type.value}" + ), + ): + _ = _get_column_statistics_type( + dataframe_name, + column_name, + getml_role, + column_type.value, + ) + + assert (role, column_type) not in ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("getml_role", "column_type", "expected_column_statistics_type"), + [ + (Role.CATEGORICAL, ColumnType.VARCHAR, ColumnStatisticsCategorical), + (Role.JOIN_KEY, ColumnType.VARCHAR, ColumnStatisticsJoinKey), + (Role.NUMERICAL, ColumnType.DOUBLE, ColumnStatisticsNumerical), + (Role.TARGET, ColumnType.DOUBLE, ColumnStatisticsTarget), + (Role.TIME_STAMP, ColumnType.TIMESTAMP_NS, ColumnStatisticsTimeStamp), + (Role.TIME_STAMP, ColumnType.DOUBLE, ColumnStatisticsTimeStampAsFloat), + (Role.TEXT, ColumnType.VARCHAR, ColumnStatisticsText), + (Role.UNUSED_FLOAT, ColumnType.DOUBLE, ColumnStatisticsUnusedFloat), + (Role.UNUSED_STRING, ColumnType.VARCHAR, ColumnStatisticsUnusedString), + ], +) +def test__get_column_statistics_type( + getml_role: GetMLRole, + column_type: ColumnType, + expected_column_statistics_type: type[ColumnStatistics], +) -> None: + # Given + column_name = "TestColumn" + dataframe_name = "mock_dataframe_name" + role = serialize_role(getml_role) + + # When + column_statistics_type = _get_column_statistics_type( + dataframe_name, + column_name, + getml_role, + column_type.value, + ) + + # Then + assert column_statistics_type is expected_column_statistics_type + assert (role, column_type) in ROLE_TO_COLUMN_STATISTICS_TYPE_MAPPING + + +@pytest.mark.unit +def test_serialize_dataframe( + tmp_path: Path, + mock_dataframe: DataFrame, + mock_duckdb_execute_factory: MockDuckDBExecuteFactory, +) -> None: + # Given + mock_duckdb_execute_factory( + { + Path(f"dataframes/{mock_dataframe.name}.parquet"): mock_dataframe, + }, + ) + + target_storage_directory = tmp_path / "dataframes" + + # When + serialized_info = serialize_dataframe( + target_storage_directory=target_storage_directory, + save_parquet=(lambda _: None), + dataframe_name=assume_is_str(mock_dataframe.name), + get_getml_role_by_column=mock_dataframe.roles.column, + column_names=mock_dataframe.columns, + ) + + # Then + expected_parquet_path = target_storage_directory / "mock_dataframe_name.parquet" + assert serialized_info.name == "mock_dataframe_name" + assert serialized_info.path == expected_parquet_path + + expected_column_information_by_name = get_expected_column_information_by_name() + assert column_information_by_name_to_json( + serialized_info.columns, + ) == column_information_by_name_to_json(expected_column_information_by_name) diff --git a/tests/unit/serialize/test_pipeline.py b/tests/unit/serialize/test_pipeline.py index ae0cb55..3403aa8 100644 --- a/tests/unit/serialize/test_pipeline.py +++ b/tests/unit/serialize/test_pipeline.py @@ -132,7 +132,7 @@ def test_serialize_pipeline( # noqa: PLR0913 assert pipeline_information.share_selected_features == 0.0 assert pipeline_information.tags == ["test_tag"] - assert pipeline_information.targets == ["test_target"] + assert pipeline_information.targets == ["target0"] assert pipeline_information.data_model is not None assert pipeline_information.data_model.population.name == "placeholder_population" @@ -386,7 +386,7 @@ def test_serialize_scores_regression(mock_scores_regression: GetMLScores) -> Non assert score.type == "regression" assert score.date_time == datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) assert score.set_used == "test_set" - assert score.target == "test_target" + assert score.target == "target0" assert score.mae == 0.1 # noqa: PLR2004 assert score.rmse == 0.2 # noqa: PLR2004 assert score.rsquared == 0.8 # noqa: PLR2004 @@ -420,7 +420,7 @@ def test_serialize_scores_classification( assert score.type == "classification" assert score.date_time == datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) assert score.set_used == "test_set" - assert score.target == "test_target" + assert score.target == "target0" assert score.accuracy == 0.1 # noqa: PLR2004 assert score.auc == 0.2 # noqa: PLR2004 assert score.cross_entropy == 0.8 # noqa: PLR2004 diff --git a/tests/unit/types.py b/tests/unit/types.py index 9d21f1e..f1f266b 100644 --- a/tests/unit/types.py +++ b/tests/unit/types.py @@ -8,8 +8,8 @@ from getml_io.metadata.pipeline_information import LossFunction StatisticsType = Mapping[str, str | float | int | None] -ColumnProfileType = Mapping[str, Mapping[str, str | StatisticsType]] -DataFrameInformationType = Mapping[str, str | Path | ColumnProfileType] +ColumnInformationType = Mapping[str, Mapping[str, str | StatisticsType]] +DataFrameInformationType = Mapping[str, str | Path | ColumnInformationType] JoinInformationType = Mapping[ str, "float | bool | Sequence[tuple[str, str]] | Relationship | PlaceholderInformationType | str", # noqa: E501 @@ -37,7 +37,7 @@ ] PredictionType = Mapping[str, Mapping[str, str | Path]] -FeatureSetType = Mapping[str, Mapping[str, str | Path | ColumnProfileType]] +FeatureSetType = Mapping[str, Mapping[str, str | Path | ColumnInformationType]] FeatureLearnerType = Mapping[str, float | str | int | bool | Sequence[str]] PredictorType = Mapping[str, float | str] FeatureType = Mapping[str, str | float | int]